diff --git a/tests/test_vlm_elliptic_coeffs.cpp b/tests/test_vlm_elliptic_coeffs.cpp index 02c91ed..4f3ea52 100644 --- a/tests/test_vlm_elliptic_coeffs.cpp +++ b/tests/test_vlm_elliptic_coeffs.cpp @@ -114,13 +114,18 @@ int main(int /*argc*/, char ** /*argv*/) { const std::vector backends = get_available_backends(); std::vector test_alphas = {0, 1, 2, 3, 4, 5, 10, 15}; std::transform(test_alphas.begin(), test_alphas.end(), test_alphas.begin(), to_radians); - - MeshGeom mesh_geom{}; + + const MeshIO mesh_io{"plot3d"}; + MeshGeom mesh_geom{}; // DEPRECATE auto solvers = tiny::make_combination(meshes, backends); for (const auto& [mesh_name, backend_name] : solvers) { std::printf(">>> MESH: %s | BACKEND: %s\n", mesh_name.get().c_str(), backend_name.get().c_str()); - mesh_io_read_file(mesh_name, &mesh_geom); - mesh_quarterchord(&mesh_geom); + + auto dims = mesh_io.get_dims(mesh_name); + // LinearVLM solver{backend_name, } + + mesh_io_read_file(mesh_name, &mesh_geom); // DEPRECATE + mesh_quarterchord(&mesh_geom); // DEPRECATE LinearVLM solver{create_backend(backend_name, &mesh_geom, 1)}; for (u64 i = 0; i < test_alphas.size(); i++) { diff --git a/vlm/backends/cpu/include/vlm_backend_cpu.hpp b/vlm/backends/cpu/include/vlm_backend_cpu.hpp index 325137e..5f1cf30 100644 --- a/vlm/backends/cpu/include/vlm_backend_cpu.hpp +++ b/vlm/backends/cpu/include/vlm_backend_cpu.hpp @@ -7,29 +7,28 @@ namespace vlm { class BackendCPU final : public Backend { public: - BackendCPU(MeshGeom* mesh, u64 timesteps); + BackendCPU(); ~BackendCPU(); - void reset() override; - void lhs_assemble() override; - void compute_rhs() override; - void add_wake_influence() override; - void wake_rollup(float dt) override; - void shed_gamma() override; - void lu_factor() override; - void lu_solve() override; - f32 compute_coefficient_cl(const FlowData& flow, const f32 area, const u64 j, const u64 n) override; - f32 compute_coefficient_unsteady_cl(const linalg::alias::float3& freestream, const SoA_3D_t& vel, f32 dt, const f32 area, const u64 j, const u64 n) override; - linalg::alias::float3 compute_coefficient_cm(const FlowData& flow, const f32 area, const f32 chord, const u64 j, const u64 n) override; - f32 compute_coefficient_cd(const FlowData& flow, const f32 area, const u64 j, const u64 n) override; - void compute_delta_gamma() override; - void set_velocities(const linalg::alias::float3& vel) override; - void set_velocities(const f32* vels) override; + void lhs_assemble(f32* lhs) override; + void rhs_assemble_velocities(f32* rhs, const f32* normals, const f32* velocities) override; + void rhs_assemble_wake_influence(f32* rhs, const f32* gamma) override; + void displace_wake_rollup(float dt, f32* rollup_vertices, f32* verts_wake, const f32* verts_wing, const f32* gamma) override; + // void displace_wing(const linalg::alias::float4x4& transform) override; + void displace_wing_and_shed(const std::vector& transform, f32* verts_wing, f32* verts_wake) override; + void gamma_shed(f32* gamma, f32* gamma_prev) override; + void gamma_delta(f32* gamma_delta, const f32* gamma) override; + void lu_factor(f32* lhs) override; + void lu_solve(f32* lhs, f32* rhs, f32* gamma) override; + + // Per mesh kernels + f32 coeff_steady_cl(const MeshParams& param, const f32* verts_wing, const f32* gamma_delta, const FlowData& flow, const f32 area, const u64 j, const u64 n) override; + f32 coeff_unsteady_cl(const MeshParams& param, const f32* verts_wing, const f32* gamma_delta, const f32* gamma, const f32* gamma_prev, const f32* local_velocities, const f32* areas, const f32* normals, const linalg::alias::float3& freestream, f32 dt, const f32 area, const u64 j, const u64 n) override; + linalg::alias::float3 coeff_steady_cm(const MeshParams& param, const FlowData& flow, const f32 area, const f32 chord, const u64 j, const u64 n) override; + f32 coeff_steady_cd(const MeshParams& param, const FlowData& flow, const f32 area, const u64 j, const u64 n) override; void mesh_metrics(const f32 alpha) override; - void mesh_move(const linalg::alias::float4x4& transform) override; - void update_wake(const linalg::alias::float3& freestream) override; - f32 mesh_mac(u64 j, u64 n) override; // mean chord - f32 mesh_area(const u64 i, const u64 j, const u64 m, const u64 n) override; // mean span + f32 mesh_mac(const MeshParams& param, const u64 j, const u64 n) override; + f32 mesh_area(const MeshParams& param, const u64 i, const u64 j, const u64 m, const u64 n) override; }; } // namespace vlm \ No newline at end of file diff --git a/vlm/backends/cpu/src/vlm_backend_cpu.cpp b/vlm/backends/cpu/src/vlm_backend_cpu.cpp index 10d0182..87e3326 100644 --- a/vlm/backends/cpu/src/vlm_backend_cpu.cpp +++ b/vlm/backends/cpu/src/vlm_backend_cpu.cpp @@ -22,67 +22,75 @@ using namespace vlm; using namespace linalg::ostream_overloads; -ispc::Mesh2 mesh_to_ispc(Mesh2* mesh) { - ispc::Mesh2 mesh_ispc; - mesh_ispc.nc = mesh->nc; - mesh_ispc.ns = mesh->ns; - mesh_ispc.nw = mesh->nw; - mesh_ispc.nwa = mesh->nwa; - mesh_ispc.vertices = mesh->vertices; - mesh_ispc.normals = mesh->normals; - mesh_ispc.colloc = mesh->colloc; - mesh_ispc.area = mesh->area; - return mesh_ispc; -} - -BackendCPU::~BackendCPU() = default; // Destructor definition - -BackendCPU::BackendCPU(MeshGeom* mesh, u64 timesteps) { - allocator.h_malloc = std::malloc; - allocator.d_malloc = std::malloc; - allocator.h_free = std::free; - allocator.d_free = std::free; - allocator.hh_memcpy = std::memcpy; - allocator.hd_memcpy = std::memcpy; - allocator.dh_memcpy = std::memcpy; - allocator.dd_memcpy = std::memcpy; - allocator.h_memset = std::memset; - allocator.d_memset = std::memset; - init(mesh, timesteps); - - d_solver_info = (i32*)allocator.d_malloc(sizeof(i32)); - d_solver_ipiv = (i32*)allocator.d_malloc(hd_mesh->nc*hd_mesh->ns*sizeof(i32)); -} - -void BackendCPU::reset() { - const u64 nb_panels_wing = hd_mesh->nc * hd_mesh->ns; - const u64 nb_vertices_wing = (hd_mesh->nc+1)*(hd_mesh->ns+1); - - - // std::fill(gamma.begin(), gamma.end(), 0.0f); - std::fill(hd_data->lhs, hd_data->lhs + nb_panels_wing*nb_panels_wing, 0.0f); // influence kernel is += - // std::fill(rhs.begin(), rhs.end(), 0.0f); - allocator.dd_memcpy(PTR_MESH_V(hd_mesh, 0, 0, 0), PTR_MESHGEOM_V(hd_mesh_geom, 0, 0, 0), nb_vertices_wing*sizeof(f32)); - allocator.dd_memcpy(PTR_MESH_V(hd_mesh, 0, 0, 1), PTR_MESHGEOM_V(hd_mesh_geom, 0, 0, 1), nb_vertices_wing*sizeof(f32)); - allocator.dd_memcpy(PTR_MESH_V(hd_mesh, 0, 0, 2), PTR_MESHGEOM_V(hd_mesh_geom, 0, 0, 2), nb_vertices_wing*sizeof(f32)); - - dd_mesh->nwa = 0; -} +const Allocator allocator_cpu{ + std::malloc, + std::malloc, + std::free, + std::free, + std::memcpy, + std::memcpy, + std::memcpy, + std::memcpy, + std::memset, + std::memset +}; + +BackendCPU::BackendCPU() : Backend(allocator_cpu) {} +BackendCPU::~BackendCPU() = default; + +// void BackendCPU::reset() { +// const u64 nb_panels_wing = hd_mesh->nc * hd_mesh->ns; +// const u64 nb_vertices_wing = (hd_mesh->nc+1)*(hd_mesh->ns+1); + + +// // std::fill(gamma.begin(), gamma.end(), 0.0f); +// std::fill(hd_data->lhs, hd_data->lhs + nb_panels_wing*nb_panels_wing, 0.0f); // influence kernel is += +// // std::fill(rhs.begin(), rhs.end(), 0.0f); +// allocator.dd_memcpy(PTR_MESH_V(hd_mesh, 0, 0, 0), PTR_MESHGEOM_V(hd_mesh_geom, 0, 0, 0), nb_vertices_wing*sizeof(f32)); +// allocator.dd_memcpy(PTR_MESH_V(hd_mesh, 0, 0, 1), PTR_MESHGEOM_V(hd_mesh_geom, 0, 0, 1), nb_vertices_wing*sizeof(f32)); +// allocator.dd_memcpy(PTR_MESH_V(hd_mesh, 0, 0, 2), PTR_MESHGEOM_V(hd_mesh_geom, 0, 0, 2), nb_vertices_wing*sizeof(f32)); + +// dd_mesh->nwa = 0; +// } -void BackendCPU::compute_delta_gamma() { - const u64 nc = dd_mesh->nc; - const u64 ns = dd_mesh->ns; - const u64 nw = dd_mesh->nw; - const u64 nwa = dd_mesh->nwa; +void BackendCPU::gamma_delta(f32* gamma_delta, const f32* gamma) { // const tiny::ScopedTimer timer("Delta Gamma"); - std::copy(dd_data->gamma, dd_data->gamma+ns, dd_data->delta_gamma); - // note: this is efficient as the memory is contiguous - for (u64 i = 1; i < nc; i++) { - for (u64 j = 0; j < ns; j++) { - dd_data->delta_gamma[i*ns + j] = dd_data->gamma[i*ns + j] - dd_data->gamma[(i-1)*ns + j]; - } - } + tf::Taskflow graph; + + auto begin = graph.placeholder(); + auto end = graph.placeholder(); + + for (const auto& p : prm) { + f32* p_gamma_delta = gamma_delta + p.poff; + const f32* p_gamma = gamma + p.poff; + tf::Task first_row = graph.emplace([&]{ + allocator.dd_memcpy(p_gamma_delta, p_gamma, p.ns * sizeof(f32)); + }); + tf::Task remaining_rows = graph.for_each_index((u64)1,p.nc, [&] (u64 b, u64 e) { + for (u64 i = b; i < e; i++) { + for (u64 j = 0; j < p.ns; j++) { + p_gamma_delta[i*p.ns + j] = p_gamma[i*p.ns + j] - p_gamma[(i-1)*p.ns + j]; + } + } + }); + + begin.precede(first_row); + begin.precede(remaining_rows); + first_row.precede(end); + remaining_rows.precede(end); + }; + + Executor::get().run(graph).wait(); + + // std::copy(gamma,gamma+ns, delta_gamma); + + // // note: this is efficient as the memory is contiguous + // for (u64 i = 1; i < nc; i++) { + // for (u64 j = 0; j < ns; j++) { + // delta_gamma[i*ns + j] = gamma[i*ns + j] - gamma[(i-1)*ns + j]; + // } + // } } void BackendCPU::lhs_assemble() { @@ -95,7 +103,7 @@ void BackendCPU::lhs_assemble() { const u64 zero = 0; const u64 end_wing = (nc - 1) * ns; - ispc::Mesh2 mesh = mesh_to_ispc(dd_mesh); + ispc::Mesh mesh = mesh_to_ispc(dd_mesh); tf::Taskflow taskflow; @@ -128,7 +136,7 @@ void BackendCPU::lhs_assemble() { Executor::get().run(taskflow).wait(); } -void BackendCPU::compute_rhs() { +void BackendCPU::rhs_assemble_velocities() { const tiny::ScopedTimer timer("RHS"); const u64 nc = dd_mesh->nc; const u64 ns = dd_mesh->ns; @@ -143,13 +151,13 @@ void BackendCPU::compute_rhs() { } } -void BackendCPU::add_wake_influence() { +void BackendCPU::rhs_assemble_wake_influence() { // const tiny::ScopedTimer timer("Wake Influence"); const u64 nc = dd_mesh->nc; const u64 ns = dd_mesh->ns; const u64 nw = dd_mesh->nw; - ispc::Mesh2 mesh = mesh_to_ispc(dd_mesh); + ispc::Mesh mesh = mesh_to_ispc(dd_mesh); tf::Taskflow taskflow; @@ -165,7 +173,7 @@ void BackendCPU::add_wake_influence() { Executor::get().run(taskflow).wait(); } -void BackendCPU::wake_rollup(float dt) { +void BackendCPU::displace_wake_rollup(float dt) { const tiny::ScopedTimer timer("Wake Rollup"); const u64 nc = dd_mesh->nc; @@ -180,7 +188,7 @@ void BackendCPU::wake_rollup(float dt) { const f32* ry = dd_data->rollup_vertices + (nc+nw+1)*(ns+1)*1; const f32* rz = dd_data->rollup_vertices + (nc+nw+1)*(ns+1)*2; - ispc::Mesh2 mesh = mesh_to_ispc(dd_mesh); + ispc::Mesh mesh = mesh_to_ispc(dd_mesh); tf::Taskflow taskflow; @@ -201,7 +209,7 @@ void BackendCPU::wake_rollup(float dt) { Executor::get().run(taskflow).wait(); } -void BackendCPU::shed_gamma() { +void BackendCPU::gamma_shed() { // const tiny::ScopedTimer timer("Shed Gamma"); const u64 nc = dd_mesh->nc; const u64 ns = dd_mesh->ns; @@ -230,7 +238,7 @@ void BackendCPU::lu_solve() { LAPACKE_sgetrs(LAPACK_COL_MAJOR, 'N', n, 1, dd_data->lhs, n, d_solver_ipiv, dd_data->gamma, n); } -f32 BackendCPU::compute_coefficient_cl(const FlowData& flow, const f32 area, const u64 j, const u64 n) { +f32 BackendCPU::coeff_steady_cl(const FlowData& flow, const f32 area, const u64 j, const u64 n) { // const tiny::ScopedTimer timer("Compute CL"); const u64 nc = dd_mesh->nc; @@ -264,7 +272,7 @@ f32 BackendCPU::compute_coefficient_cl(const FlowData& flow, const f32 area, con return cl; } -f32 BackendCPU::compute_coefficient_unsteady_cl(const linalg::alias::float3& freestream, const SoA_3D_t& vel, f32 dt, const f32 area, const u64 j, const u64 n) { +f32 BackendCPU::coeff_unsteady_cl(const linalg::alias::float3& freestream, const SoA_3D_t& vel, f32 dt, const f32 area, const u64 j, const u64 n) { assert(n > 0); assert(j >= 0 && j+n <= dd_mesh->ns); @@ -315,7 +323,7 @@ f32 BackendCPU::compute_coefficient_unsteady_cl(const linalg::alias::float3& fre return cl; } -linalg::alias::float3 BackendCPU::compute_coefficient_cm( +linalg::alias::float3 BackendCPU::coeff_steady_cm( const FlowData& flow, const f32 area, const f32 chord, @@ -352,7 +360,7 @@ linalg::alias::float3 BackendCPU::compute_coefficient_cm( return cm; } -f32 BackendCPU::compute_coefficient_cd( +f32 BackendCPU::coeff_steady_cd( const FlowData& flow, const f32 area, const u64 j, @@ -362,7 +370,7 @@ f32 BackendCPU::compute_coefficient_cd( assert(n > 0); assert(j >= 0 && j+n <= dd_mesh->ns); // std::fill(dd_data->trefftz_buffer, dd_data->trefftz_buffer + hd_mesh->ns, 0.0f); - ispc::Mesh2 mesh = mesh_to_ispc(dd_mesh); + ispc::Mesh mesh = mesh_to_ispc(dd_mesh); f32 cd = ispc::kernel_trefftz_cd2(&mesh, dd_data->gamma, j, n, sigma_vatistas); cd /= linalg::length2(flow.freestream) * area; return cd; @@ -519,7 +527,7 @@ f32 BackendCPU::mesh_area(const u64 i, const u64 j, const u64 m, const u64 n) { return area_sum; } -void BackendCPU::mesh_move(const linalg::alias::float4x4& transform) { +void BackendCPU::displace_wing_and_shed(const linalg::alias::float4x4& transform) { // const tiny::ScopedTimer t("Mesh::move"); assert(dd_mesh->nwa < dd_mesh->nw); // check if we have capacity diff --git a/vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc b/vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc index d4d2df4..51179c2 100644 --- a/vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc +++ b/vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc @@ -44,7 +44,7 @@ inline F3 quad_normal(const F3& v0, const F3& v1, const F3& v2, const F3& v3) { return normalize(cross(v3-v1, v2-v0)); } -export struct Mesh2 { +export struct Mesh { float* vertices; // (nc+nw+1)*(ns+1)*3 // TODO: evaluate if we really need those values or we can just compute on the fly in the kernels float* normals; // nc*ns*3 @@ -109,7 +109,7 @@ inline void kernel_symmetry(float3& inf, C colloc, const V& vertex0, const V& ve } export void kernel_influence( - uniform const Mesh2* uniform m, + uniform const Mesh* uniform m, uniform float* uniform lhs, uniform uint64 ia, uniform uint64 lidx, uniform float sigma ) { @@ -153,7 +153,7 @@ export void kernel_influence( } export void kernel_wake_influence( - uniform const Mesh2* uniform m, + uniform const Mesh* uniform m, uniform uint64 lidx, uniform float* uniform gamma, uniform float* uniform rhs, uniform float sigma ) { const uniform uint64 nb_panels = m->ns * m->nc; @@ -199,7 +199,7 @@ export void kernel_wake_influence( } export void kernel_rollup( - uniform const Mesh2* uniform m, uniform float dt, + uniform const Mesh* uniform m, uniform float dt, uniform float* uniform rollup, uniform uint64 vidx, uniform float* uniform gamma, uniform float sigma ) { @@ -269,7 +269,7 @@ export void kernel_rollup( } // export uniform float kernel_trefftz_cd( -// uniform const Mesh2* uniform m, +// uniform const Mesh* uniform m, // uniform float* uniform gamma, // uniform float* uniform trefftz_buffer, // uniform uint64 j, uniform uint64 n, uniform float sigma @@ -325,7 +325,7 @@ export void kernel_rollup( // } export uniform float kernel_trefftz_cd2( - uniform const Mesh2* uniform m, + uniform const Mesh* uniform m, uniform float* uniform gamma, uniform uint64 jp, uniform uint64 n, uniform float sigma ) { @@ -382,7 +382,7 @@ export uniform float kernel_trefftz_cd2( } export uniform float kernel_trefftz_cl( - uniform const Mesh2* uniform m, + uniform const Mesh* uniform m, uniform float* uniform gamma, uniform uint64 j, uniform uint64 n ) { diff --git a/vlm/include/vlm.hpp b/vlm/include/vlm.hpp index 194c590..bb8ce00 100644 --- a/vlm/include/vlm.hpp +++ b/vlm/include/vlm.hpp @@ -8,77 +8,123 @@ namespace vlm { -class Solver { +class Simulation { public: std::unique_ptr backend; - Solver(const tiny::Config& cfg); - Solver(std::unique_ptr&& backend_) : backend(std::move(backend_)) {}; - virtual ~Solver() = default; -}; + Mesh2 mesh; -class NonLinearVLM final: public Solver { - public: - static constexpr f64 DEFAULT_TOL = 1e-5; - static constexpr u64 DEFAULT_MAX_ITER = 100; - - const f64 tol; - const u64 max_iter; - f32* strip_alphas = nullptr; // ns - f32* velocities = nullptr; // nc*ns*3 - - NonLinearVLM( // init from cfg - const tiny::Config& cfg, - const f64 tol_= DEFAULT_TOL, - const u64 max_iter_ = DEFAULT_MAX_ITER - ): - Solver(cfg), - tol(tol_), - max_iter(max_iter_) - { - alloc(); - } - NonLinearVLM( // init from backend - std::unique_ptr&& backend_, - const f64 tol_= DEFAULT_TOL, - const u64 max_iter_ = DEFAULT_MAX_ITER - ) : - Solver(std::move(backend_)), - tol(tol_), - max_iter(max_iter_) - { - alloc(); - } - ~NonLinearVLM() { - dealloc(); - }; - AeroCoefficients solve(const FlowData& flow, const Database& db); + Simulation(const std::string& backend_name, const std::vector& meshes) : backend(create_backend(backend_name)), mesh(backend->allocator) { + // Read the sizes of all the meshes + { + u64 off_wing_p = 0; + u64 off_wing_v = 0; + for (const auto& m_name : meshes) { + const MeshIO mesh_io{"plot3d"}; // TODO, infer from mesh_name + auto [nc,ns] = mesh_io.get_dims(m_name); + mesh.params.emplace_back(MeshParams{nc, ns, 0, 0, off_wing_p, off_wing_v, 0, 0, true}); + off_wing_p += mesh.params.back().nb_panels_wing(); + off_wing_v += mesh.params.back().nb_vertices_wing(); + } + } + mesh.alloc_wing(); - private: - void alloc() { - strip_alphas = (f32*)backend->allocator.h_malloc(backend->hd_mesh->ns * sizeof(f32)); - velocities = (f32*)backend->allocator.h_malloc(backend->hd_mesh->nc * backend->hd_mesh->ns * 3 * sizeof(f32)); - } - void dealloc() { - backend->allocator.h_free(strip_alphas); - backend->allocator.h_free(velocities); - } + // Perform the actual read of the mesh files + for (u64 i = 0; i < meshes.size(); i++) { + const MeshIO mesh_io{"plot3d"}; // TODO, infer from mesh_name + mesh_io.read(meshes[i], mesh.verts_wing_init.h_ptr() + mesh.params[i].off_wing_p); + } + }; + virtual ~Simulation() = default; }; -class LinearVLM final: public Solver { +class VLM final: public Simulation { public: - LinearVLM(const tiny::Config& cfg): Solver(cfg) {} - LinearVLM(std::unique_ptr&& backend_): Solver(std::move(backend_)) {}; - ~LinearVLM() = default; - AeroCoefficients solve(const FlowData& flow); + VLM(const std::string& backend_name, const std::vector& meshes); + ~VLM() = default; + AeroCoefficients run(const FlowData& flow); + private: + void alloc_buffers(); + Buffer lhs; // (ns*nc)^2 + Buffer rhs; // ns*nc + Buffer gamma_wing; // nc*ns + Buffer gamma_wake; // nw*ns + Buffer gamma_wing_prev; // nc*ns + Buffer gamma_wing_delta; // nc*ns + Buffer local_velocities; // ns*nc*3 }; -class UVLM final: public Solver { - public: +// class Solver { +// public: +// std::unique_ptr backend; +// Solver(const tiny::Config& cfg); +// Solver(std::unique_ptr&& backend_) : backend(std::move(backend_)) {}; +// virtual ~Solver() = default; +// }; - UVLM(const tiny::Config& cfg): Solver(cfg) {} - UVLM(std::unique_ptr&& backend_): Solver(std::move(backend_)) {}; - ~UVLM() = default; - AeroCoefficients solve(const FlowData& flow); -}; +// class NonLinearVLM final: public Solver { +// public: +// static constexpr f64 DEFAULT_TOL = 1e-5; +// static constexpr u64 DEFAULT_MAX_ITER = 100; + +// const f64 tol; +// const u64 max_iter; +// f32* strip_alphas = nullptr; // ns +// f32* velocities = nullptr; // nc*ns*3 + +// NonLinearVLM( // init from cfg +// const tiny::Config& cfg, +// const f64 tol_= DEFAULT_TOL, +// const u64 max_iter_ = DEFAULT_MAX_ITER +// ): +// Solver(cfg), +// tol(tol_), +// max_iter(max_iter_) +// { +// alloc(); +// } +// NonLinearVLM( // init from backend +// std::unique_ptr&& backend_, +// const f64 tol_= DEFAULT_TOL, +// const u64 max_iter_ = DEFAULT_MAX_ITER +// ) : +// Solver(std::move(backend_)), +// tol(tol_), +// max_iter(max_iter_) +// { +// alloc(); +// } +// ~NonLinearVLM() { +// dealloc(); +// }; +// AeroCoefficients solve(const FlowData& flow, const Database& db); + +// private: +// void alloc() { +// strip_alphas = (f32*)backend->allocator.h_malloc(backend->hd_mesh->ns * sizeof(f32)); +// velocities = (f32*)backend->allocator.h_malloc(backend->hd_mesh->nc * backend->hd_mesh->ns * 3 * sizeof(f32)); +// } +// void dealloc() { +// backend->allocator.h_free(strip_alphas); +// backend->allocator.h_free(velocities); +// } +// }; + +// class LinearVLM final: public Solver { +// public: +// LinearVLM(const tiny::Config& cfg): Solver(cfg) {} +// LinearVLM(std::unique_ptr&& backend_): Solver(std::move(backend_)) {}; +// ~LinearVLM() = default; +// AeroCoefficients solve(const FlowData& flow); +// }; + + +// class UVLM final: public Solver { +// public: + +// UVLM(const tiny::Config& cfg): Solver(cfg) {} +// UVLM(std::unique_ptr&& backend_): Solver(std::move(backend_)) {}; +// ~UVLM() = default; +// AeroCoefficients solve(const FlowData& flow); +// }; } // namespace vlm diff --git a/vlm/include/vlm_allocator.hpp b/vlm/include/vlm_allocator.hpp index 3ac1e88..c91c555 100644 --- a/vlm/include/vlm_allocator.hpp +++ b/vlm/include/vlm_allocator.hpp @@ -1,23 +1,212 @@ #pragma once +#include +#include +#include + namespace vlm { -typedef void* (*malloc_f)(size_t size); -typedef void (*free_f)(void* ptr); -typedef void* (*memcpy_f)(void* dst, const void* src, size_t size); -typedef void* (*memset_f)(void* dst, int value, size_t size); +using malloc_f = void* (*)(std::size_t size); +using free_f = void (*)(void* ptr); +using memcpy_f = void* (*)(void* dst, const void* src, std::size_t size); +using memset_f = void* (*)(void* dst, int value, std::size_t size); struct Allocator { - malloc_f h_malloc; - malloc_f d_malloc; - free_f h_free; - free_f d_free; - memcpy_f hh_memcpy; - memcpy_f hd_memcpy; - memcpy_f dh_memcpy; - memcpy_f dd_memcpy; - memset_f h_memset; - memset_f d_memset; + const malloc_f h_malloc; + const malloc_f d_malloc; + const free_f h_free; + const free_f d_free; + const memcpy_f hh_memcpy; + const memcpy_f hd_memcpy; + const memcpy_f dh_memcpy; + const memcpy_f dd_memcpy; + const memset_f h_memset; + const memset_f d_memset; + + Allocator() = delete; + Allocator( + const malloc_f h_malloc, + const malloc_f d_malloc, + const free_f h_free, + const free_f d_free, + const memcpy_f hh_memcpy, + const memcpy_f hd_memcpy, + const memcpy_f dh_memcpy, + const memcpy_f dd_memcpy, + const memset_f h_memset, + const memset_f d_memset + ) : h_malloc(h_malloc), d_malloc(d_malloc), h_free(h_free), d_free(d_free), + hh_memcpy(hh_memcpy), hd_memcpy(hd_memcpy), dh_memcpy(dh_memcpy), dd_memcpy(dd_memcpy), + h_memset(h_memset), d_memset(d_memset) {} +}; + +struct Host {}; +struct Device {}; +struct HostDevice {}; + +// template +// class Buffer { +// static_assert(std::is_same_v || std::is_same_v || std::is_same_v, "Invalid Location type"); +// using is_host = std::bool_constant || std::is_same_v>; +// using is_device = std::bool_constant || std::is_same_v>; + +// public: +// explicit Buffer(Allocator* allocator, std::size_t n) : allocator(allocator) { alloc(n); } +// explicit Buffer() = default; +// ~Buffer() { dealloc(); } + +// T* h_ptr() { +// static_assert(is_host::value); +// return _host; +// } +// const T* h_ptr() const { +// static_assert(is_host::value); +// return _host; +// } +// T* d_ptr() { +// static_assert(is_device::value); +// return _device; +// } +// const T* d_ptr() const { +// static_assert(is_device::value); +// return _device; +// } + +// void to_device() { +// static_assert(is_host::value && is_device::value); +// if (allocator->d_malloc != allocator->h_malloc) +// allocator->hd_memcpy(_device, static_cast(_host), _size * sizeof(T)); +// } + +// void to_host() { +// static_assert(is_host::value && is_device::value); +// if (allocator->d_malloc != allocator->h_malloc) +// allocator->dh_memcpy(_host, static_cast(_device), _size * sizeof(T)); +// } + +// void set_allocator(Allocator* allocator) { +// this->allocator = allocator; +// } + +// private: +// // 32 bytes max +// Allocator* allocator = nullptr; +// T* _host = nullptr; +// T* _device = nullptr; +// std::size_t _size = 0; + +// void alloc(std::size_t n) { +// assert(allocator); +// _size = n; +// if constexpr (is_host::value) { +// _host = static_cast(allocator->h_malloc(_size * sizeof(T))); +// } +// if constexpr (is_device::value) { +// if constexpr (!is_host::value) assert(allocator->d_malloc != allocator->h_malloc); +// _device = (allocator->d_malloc != allocator->h_malloc) +// ? static_cast(allocator->d_malloc(_size * sizeof(T))) +// : _host; +// } +// } + +// void dealloc() { +// if (allocator) { +// if constexpr (is_host::value) { +// allocator->h_free(_host); +// } +// if constexpr (is_device::value) { +// if (allocator->d_malloc != allocator->h_malloc) allocator->d_free(_device); +// } +// } +// } + +// Buffer(Buffer&&) = delete; +// Buffer(const Buffer&) = delete; +// Buffer& operator=(Buffer&&) = delete; +// Buffer& operator=(const Buffer&) = delete; +// }; + +template +class Buffer { + static_assert(std::is_same_v || std::is_same_v || std::is_same_v, "Invalid Location type"); + using is_host = std::bool_constant || std::is_same_v>; + using is_device = std::bool_constant || std::is_same_v>; + +public: + Buffer() = delete; + explicit Buffer(const Allocator& allocator, std::size_t n) : allocator(allocator) { alloc(n); } + explicit Buffer(const Allocator& allocator) : allocator(allocator) {} + ~Buffer() { dealloc(); } + + T* h_ptr() { + static_assert(is_host::value); + return _host; + } + const T* h_ptr() const { + static_assert(is_host::value); + return _host; + } + T* d_ptr() { + static_assert(is_device::value); + return _device; + } + const T* d_ptr() const { + static_assert(is_device::value); + return _device; + } + + void to_device() { + static_assert(is_host::value && is_device::value); + if (allocator.d_malloc != allocator.h_malloc) + allocator.hd_memcpy(_device, static_cast(_host), _size * sizeof(T)); + } + + void to_host() { + static_assert(is_host::value && is_device::value); + if (allocator.d_malloc != allocator.h_malloc) + allocator.dh_memcpy(_host, static_cast(_device), _size * sizeof(T)); + } + + void set_allocator(Allocator* allocator) { + this->allocator = allocator; + } + + std::size_t len() const { return _size; } + std::size_t size() const { return _size * sizeof(T); } + + void alloc(std::size_t n) { + _size = n; + if constexpr (is_host::value) { + _host = static_cast(allocator.h_malloc(_size * sizeof(T))); + } + if constexpr (is_device::value) { + if constexpr (!is_host::value) assert(allocator.d_malloc != allocator.h_malloc); + _device = (allocator.d_malloc != allocator.h_malloc) + ? static_cast(allocator.d_malloc(_size * sizeof(T))) + : _host; + } + } + + void dealloc() { + if constexpr (is_host::value) { + allocator.h_free(_host); + } + if constexpr (is_device::value) { + if (allocator.d_malloc != allocator.h_malloc) allocator.d_free(_device); + } + } + +private: + // 32 bytes max + const Allocator& allocator; + T* _host = nullptr; + T* _device = nullptr; + std::size_t _size = 0; + + Buffer(Buffer&&) = delete; + Buffer(const Buffer&) = delete; + Buffer& operator=(Buffer&&) = delete; + Buffer& operator=(const Buffer&) = delete; }; } // namespace vlm \ No newline at end of file diff --git a/vlm/include/vlm_backend.hpp b/vlm/include/vlm_backend.hpp index a818aab..fcd08b7 100644 --- a/vlm/include/vlm_backend.hpp +++ b/vlm/include/vlm_backend.hpp @@ -10,56 +10,41 @@ namespace vlm { class Backend { public: - Allocator allocator; - - // Constant inital position meshes (for kinematics) - MeshGeom* hh_mesh_geom; // borrowed ptr - MeshGeom* hd_mesh_geom; - MeshGeom* dd_mesh_geom; - - // Mutable meshes (temporal state) - // Mesh2* hh_mesh; // host ptr to host buffers for io - Mesh2* hd_mesh; // host ptr to device buffers - Mesh2* dd_mesh; // device ptr to device buffers for kernels - - Data* hd_data; - Data* dd_data; + const Allocator allocator; + std::vector prm; i32* d_solver_info = nullptr; i32* d_solver_ipiv = nullptr; f32* d_solver_buffer = nullptr; f32 sigma_vatistas = 0.0f; - Backend() = default; + Backend(const Allocator& allocator) : allocator(allocator) {} ~Backend(); - void init(MeshGeom* mesh_geom, u64 timesteps); // Acts as delayed constructor - virtual void reset() = 0; - virtual void lhs_assemble() = 0; - virtual void compute_rhs() = 0; - virtual void add_wake_influence() = 0; - virtual void wake_rollup(float dt) = 0; - virtual void shed_gamma() = 0; - virtual void lu_factor() = 0; - virtual void lu_solve() = 0; - virtual f32 compute_coefficient_cl(const FlowData& flow, const f32 area, const u64 j, const u64 n) = 0; - virtual f32 compute_coefficient_unsteady_cl(const linalg::alias::float3& freestream, const SoA_3D_t& vel, f32 dt, const f32 area, const u64 j, const u64 n) = 0; - f32 compute_coefficient_cl(const FlowData& flow); - virtual linalg::alias::float3 compute_coefficient_cm(const FlowData& flow, const f32 area, const f32 chord, const u64 j, const u64 n) = 0; - linalg::alias::float3 compute_coefficient_cm(const FlowData& flow); - virtual f32 compute_coefficient_cd(const FlowData& flow, const f32 area, const u64 j, const u64 n) = 0; - f32 compute_coefficient_cd(const FlowData& flow); - virtual void compute_delta_gamma() = 0; - virtual void set_velocities(const linalg::alias::float3& vel) = 0; - virtual void set_velocities(const f32* vels) = 0; + + // Kernels that run for all the meshes + virtual void lhs_assemble(f32* lhs) = 0; + virtual void rhs_assemble_velocities(f32* rhs, const f32* normals, const f32* velocities) = 0; + virtual void rhs_assemble_wake_influence(f32* rhs, const f32* gamma) = 0; + virtual void displace_wake_rollup(float dt, f32* rollup_vertices, f32* verts_wake, const f32* verts_wing, const f32* gamma) = 0; + // virtual void displace_wing(const linalg::alias::float4x4& transform) = 0; + virtual void displace_wing_and_shed(const std::vector& transform, f32* verts_wing, f32* verts_wake) = 0; + virtual void gamma_shed(f32* gamma, f32* gamma_prev) = 0; + virtual void gamma_delta(f32* gamma_delta, const f32* gamma) = 0; + virtual void lu_factor(f32* lhs) = 0; + virtual void lu_solve(f32* lhs, f32* rhs, f32* gamma) = 0; + + // Per mesh kernels + virtual f32 coeff_steady_cl(const MeshParams& param, const f32* verts_wing, const f32* gamma_delta, const FlowData& flow, const f32 area, const u64 j, const u64 n) = 0; + virtual f32 coeff_unsteady_cl(const MeshParams& param, const f32* verts_wing, const f32* gamma_delta, const f32* gamma, const f32* gamma_prev, const f32* local_velocities, const f32* areas, const f32* normals, const linalg::alias::float3& freestream, f32 dt, const f32 area, const u64 j, const u64 n) = 0; + virtual linalg::alias::float3 coeff_steady_cm(const MeshParams& param, const FlowData& flow, const f32 area, const f32 chord, const u64 j, const u64 n) = 0; + virtual f32 coeff_steady_cd(const MeshParams& param, const FlowData& flow, const f32 area, const u64 j, const u64 n) = 0; virtual void mesh_metrics(const f32 alpha) = 0; - virtual void mesh_move(const linalg::alias::float4x4& transform) = 0; - virtual void update_wake(const linalg::alias::float3& freestream) = 0; // TEMPORARY - virtual f32 mesh_mac(u64 j, u64 n) = 0; // mean chord - virtual f32 mesh_area(const u64 i, const u64 j, const u64 m, const u64 n) = 0; // mean span + virtual f32 mesh_mac(const MeshParams& param, const u64 j, const u64 n) = 0; + virtual f32 mesh_area(const MeshParams& param, const u64 i, const u64 j, const u64 m, const u64 n) = 0; }; -std::unique_ptr create_backend(const std::string& backend_name, MeshGeom* mesh, int timesteps); +std::unique_ptr create_backend(const std::string& backend_name); std::vector get_available_backends(); } // namespace vlm diff --git a/vlm/include/vlm_mesh.hpp b/vlm/include/vlm_mesh.hpp index cee8fea..a042382 100644 --- a/vlm/include/vlm_mesh.hpp +++ b/vlm/include/vlm_mesh.hpp @@ -1,11 +1,11 @@ #pragma once +#include + #include "vlm_types.hpp" #include "vlm_allocator.hpp" #include "tinyfwd.hpp" -#include "tinyview.hpp" - #define PTR_MESHGEOM_V(m, i,j,k) (m->vertices + (j) + (i) * (m->ns+1) + (k) * (m->nc+1) * (m->ns+1)) #define PTR_MESH_V(m, i,j,k) (m->vertices + (j) + (i) * (m->ns+1) + (k) * (m->nc+m->nw+1) * (m->ns+1)) @@ -14,138 +14,76 @@ namespace vlm { -// C interface +struct MeshParams { + u64 nc = 0; // nb of panels chordwise + u64 ns = 0; // nb of panels spanwise + u64 nw = 0; // nb of wake panels chordwise + u64 nwa = 0; // nb of acive wake panels chordwise + u64 off_wing_p = 0; // wing panel offset for buffers + u64 off_wing_v = 0; // wing vertex offset for buffers + u64 off_wake_p = 0; // wake panel offset for buffers + u64 off_wake_v = 0; // wake vertex offset for buffers + bool wake = true; // whether the surface produces wake + + inline u64 nb_panels_wing() {return nc * ns;} + inline u64 nb_panels_wake() {return nw * ns;} + inline u64 nb_vertices_wing() {return (nc+1) * (ns+1);} + inline u64 nb_vertices_wake() {return (nw+1) * (ns+1);} +}; + +// TODO: move functions into cpp file struct Mesh2 { - f32* vertices = nullptr; // (nc+nw+1)*(ns+1)*3 - //f32* verts_wake = nullptr; // (nw+1)*(ns+1)*3 - // TODO: evaluate if we really need those values or we can just compute on the fly in the kernels - f32* normals = nullptr; // nc*ns*3 - f32* colloc = nullptr; // nc*ns*3 - f32* area = nullptr; // nc*ns + Buffer verts_wing_init; // (nc+1)*(ns+1)*3 + Buffer verts_wing; // (nc+1)*(ns+1)*3 + Buffer verts_wake; // (nw+1)*(ns+1)*3 + Buffer normals; // nc*ns*3 + Buffer colloc; // nc*ns*3 + Buffer area; // nc*ns - u64 nc = 0; // number of wing panels chordwise - u64 ns = 0; // number of wing panels spanwise - u64 nw = 0; // number of wake panels chordwise - u64 nwa = 0; // number of wake panels active chordwise + std::vector params; - f32 s_ref = 0.0f; // reference area - f32 c_ref = 0.0f; // reference chord f32 frame[16] = { 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f - }; // Col major order (TODO: move this to kinematic tracker) - - char* name = nullptr; // mesh name - bool lifting = true; + }; // Col major order (TODO: move this to kinematic tracker?) + + Mesh2(const Allocator& allocator) : + verts_wing_init(allocator), verts_wing(allocator), verts_wake(allocator), + normals(allocator), colloc(allocator), area(allocator) {}; + void alloc_wing() { + const u64 total_verts = params.back().off_wing_v + params.back().nb_panels_wing(); + const u64 total_panels = params.back().off_wing_p + params.back().nb_vertices_wing(); + verts_wing_init.alloc(total_verts*3); + verts_wing.alloc(total_verts*3); + normals.alloc(total_panels*3); + colloc.alloc(total_panels*3); + area.alloc(total_panels); + } + void alloc_wake() { + const u64 total_wake_verts = params.back().off_wake_v + params.back().nb_vertices_wake(); + verts_wake.alloc(total_wake_verts*3); + } }; -inline u64 mesh_nb_panels_wing(const Mesh2* m) {return m->nc * m->ns;} -inline u64 mesh_nb_panels_total(const Mesh2* m) {return (m->nc+m->nw) * m->ns;} -inline u64 mesh_nb_vertices_wing(const Mesh2* m) {return (m->nc+1) * (m->ns+1);} -inline u64 mesh_nb_vertices_total(const Mesh2* m) {return (m->nc+m->nw+1) * (m->ns+1);} +using SurfDims = std::pair; // nc, ns -// Contains only the raw geometry -struct MeshGeom { - f32* vertices = nullptr; - u64 nc = 0; // number of wing panels chordwise - u64 ns = 0; +class MeshFile { +public: + virtual ~MeshFile() = default; + virtual SurfDims get_dims(std::ifstream& file) const = 0; + virtual SurfDims read(std::ifstream& file, f32* vertices) const = 0; }; -void mesh_quarterchord(MeshGeom* mesh_geom); -void mesh_io_read_file(const std::string& filename, MeshGeom* mesh_geom); -void mesh_alloc(const malloc_f malloc, Mesh2* mesh, u64 nc, u64 ns, u64 nw, u64 nwa); -void mesh_free(const free_f free, Mesh2* mesh); -void mesh_copy(const memcpy_f memcpy, Mesh2* dst, const Mesh2* src); - -// // === STRUCTURED MESH === -// // nc : number of panels chordwise -// // ns : number of panels spanwise -// // nw : number of panels in chordwise wake -// // ncw : nc + nw -// class Mesh { -// public: -// // Unstructured members (for exporting results) -// // --------------------- -// // All vertices stored in single SoA for result exporting -// // (stored in span major order) -// SoA_3D_t v; // size (ncw+1)*(ns+1) -// // Offsets for indexing in connectivity array for each panel -// std::vector offsets = {}; // size nc*ns + 1 -// // Panel-vertex connectivity -// // vertices indices of a panel: connectivity[offsets[i]] to connectivity[offsets[i+1]] -// std::vector connectivity = {}; // size 4*(nc*ns) - -// // Panels metrics (stored span major order) -// // --------------------- -// // Collocation points of panels -// SoA_3D_t colloc; // size ncw*ns -// // Normals of panels -// SoA_3D_t normal; // size ncw*ns -// // Area of panels -// std::vector area = {}; // size ncw*ns - -// // Structured dimensions -// // --------------------- -// u64 nc = 1; // number of panels chordwise -// u64 ns = 1; // number of panels spanwise -// u64 nw; // number of wake panels chordwise (max capacity) -// u64 current_nw = 0; // current number of built wake panels (temporary) - -// // Analytical quanities when provided -// // --------------------- -// f32 s_ref = 0.0f; // reference area (of wing) -// f32 c_ref = 0.0f; // reference chord (of wing) - -// linalg::alias::float4x4 frame = linalg::identity; // transformation matrix -// linalg::alias::float3 ref_pt = {0.25f, 0.0f, 0.0f}; // TODO: deprecate - -// void update_wake(const linalg::alias::float3& u_inf); // update wake vertices -// void correction_high_aoa(f32 alpha_rad); // correct collocation points for high aoa -// void create_vortex_panels(); // create true vortex panels -// void compute_connectivity(); // fill offsets, connectivity -// void compute_metrics_wing(); // fill colloc, normal, area -// void compute_metrics_wake(); -// void compute_metrics_i(u64 i); -// void move(const linalg::alias::float4x4& transform, const SoA_3D_t& origin_pos); -// void resize_wake(const u64 nw); - -// u64 nb_panels_wing() const; -// u64 nb_panels_total() const; -// u64 nb_vertices_wing() const; -// u64 nb_vertices_total() const; -// f32 panels_area(const u64 i, const u64 j, const u64 m, const u64 n) const; -// f32 panels_area_xy(const u64 i, const u64 j, const u64 m, const u64 n) const; -// f32 panel_width_y(const u64 i, const u64 j) const; -// f32 panel_length(const u64 i, const u64 j) const; -// f32 strip_width(const u64 j) const; -// f32 chord_length(const u64 j) const; // vertex idx -// f32 chord_mean(const u64 j, const u64 n) const; - -// // i panel vertices coordinates -// linalg::alias::float3 get_v0(u64 i) const; // upper left -// linalg::alias::float3 get_v1(u64 i) const; // upper right -// linalg::alias::float3 get_v2(u64 i) const; // lower right -// linalg::alias::float3 get_v3(u64 i) const; // lower left - -// void io_read(const std::string& filename); -// Mesh(const tiny::Config& cfg); -// Mesh( -// const std::string& filename, -// const u64 nw, -// const f32 s_ref_, -// const f32 c_ref_, -// const linalg::alias::float3& ref_pt_ // todo: deprecate -// ); - -// private: -// void alloc(); // allocate memory for all buffers -// void init(); // called at the end of constructor -// void io_read_plot3d_structured(std::ifstream& f); -// }; - -// // todo, update this to mirror the constructor -// std::unique_ptr create_mesh(const std::string& filename, const u64 nw=1); +class MeshIO { +public: + MeshIO(const std::string& format); + SurfDims get_dims(const std::string& filename) const; + void read(const std::string& filename, f32* vertices) const; + // void write(const std::string& filename, const Mesh* mesh) const; +private: + std::unique_ptr _file; +}; } // namespace vlm \ No newline at end of file diff --git a/vlm/src/vlm.cpp b/vlm/src/vlm.cpp index c4588e0..a6fd658 100644 --- a/vlm/src/vlm.cpp +++ b/vlm/src/vlm.cpp @@ -23,92 +23,123 @@ using namespace vlm; // backend = create_backend(backend_name, *mesh); // } -AeroCoefficients LinearVLM::solve(const FlowData& flow) { - auto init_pos = translation_matrix({ - -100.0f * flow.u_inf*std::cos(flow.alpha), - 0.0f, - -100.0f * flow.u_inf*std::sin(flow.alpha) - }); +VLM::VLM(const std::string& backend_name, const std::string& mesh_name) : Simulation(backend_name, mesh_name), + lhs(backend->allocator), rhs(backend->allocator), gamma(backend->allocator), gamma_prev(backend->allocator), delta_gamma(backend->allocator), local_velocities(backend->allocator) { + mesh.alloc_wake(1); // single wake panel + mesh.verts_wing_init.to_device(); + alloc_buffers(); +}; - backend->reset(); - //backend->update_wake(flow.freestream); - backend->mesh_move(init_pos); - backend->mesh_metrics(flow.alpha); - backend->lhs_assemble(); - backend->set_velocities(flow.freestream); - backend->compute_rhs(); - backend->lu_factor(); - backend->lu_solve(); - backend->compute_delta_gamma(); - return AeroCoefficients{ - backend->compute_coefficient_cl(flow), - backend->compute_coefficient_cd(flow), - backend->compute_coefficient_cm(flow) - }; +void VLM::alloc_buffers() { + lhs.alloc(mesh.nb_panels_wing() * mesh.nb_panels_wing()); + rhs.alloc(mesh.nb_panels_wing()); + gamma.alloc(mesh.nb_panels_total()); + gamma_prev.alloc(mesh.nb_panels_wing()); + delta_gamma.alloc(mesh.nb_panels_wing()); + local_velocities.alloc(mesh.nb_panels_wing() * 3); } -void strip_alpha_to_vel(const FlowData& flow, Mesh2* mesh, f32* local_velocities, f32* strip_alphas) { - const u64 nb_panels_wing = mesh_nb_panels_wing(mesh); - f32* lvx = local_velocities + 0 * nb_panels_wing; - f32* lvy = local_velocities + 1 * nb_panels_wing; - f32* lvz = local_velocities + 2 * nb_panels_wing; - for (u64 j = 0; j < mesh->ns; j++) { - auto fs = compute_freestream(flow.u_inf, strip_alphas[j], 0.0f); - lvx[j] = fs.x; - lvy[j] = fs.y; - lvz[j] = fs.z; - } - for (u64 i = 1; i < mesh->nc; i++) { - std::memcpy(lvx + i * mesh->ns, lvx, mesh->ns * sizeof(*lvx)); - std::memcpy(lvy + i * mesh->ns, lvy, mesh->ns * sizeof(*lvy)); - std::memcpy(lvz + i * mesh->ns, lvz, mesh->ns * sizeof(*lvz)); - } -} +AeroCoefficients VLM::run(const FlowData& flow) { + // Reset buffer state + backend->allocator.d_memset(lhs.d_ptr(), 0, lhs.size()); + backend->allocator.dd_memcpy(mesh.verts_wing.d_ptr(), mesh.verts_wing_init.d_ptr(), mesh.verts_wing.size()); + mesh.nwa = 0; -AeroCoefficients NonLinearVLM::solve(const FlowData& flow, const Database& db) { - f64 err = 1.0f; // l1 error auto init_pos = translation_matrix({ -100.0f * flow.u_inf*std::cos(flow.alpha), 0.0f, -100.0f * flow.u_inf*std::sin(flow.alpha) }); - std::fill(strip_alphas, strip_alphas + backend->hd_mesh->ns, flow.alpha); // memset - - backend->reset(); - backend->mesh_move(init_pos); - backend->mesh_metrics(flow.alpha); - backend->lhs_assemble(); - backend->lu_factor(); - - for (u64 iter = 0; iter < max_iter && err > tol; iter++) { - err = 0.0; // reset l1 error - strip_alpha_to_vel(flow, backend->hd_mesh, velocities, strip_alphas); // Compute local panel velocities based on strip alphas - backend->allocator.hd_memcpy(backend->dd_data->local_velocities, velocities, 3 * mesh_nb_panels_wing(backend->hd_mesh) * sizeof(*velocities)); - backend->compute_rhs(); // Compute RHS using strip alphas (on CPU) - backend->lu_solve(); // Copy RHS on device and solve for gamma - backend->compute_delta_gamma(); // Compute the chordwise delta gammas for force computation + backend->mesh_move(mesh, init_pos); +} + +// AeroCoefficients LinearVLM::solve(const FlowData& flow) { +// auto init_pos = translation_matrix({ +// -100.0f * flow.u_inf*std::cos(flow.alpha), +// 0.0f, +// -100.0f * flow.u_inf*std::sin(flow.alpha) +// }); + +// backend->reset(); +// //backend->update_wake(flow.freestream); +// backend->mesh_move(init_pos); +// backend->mesh_metrics(flow.alpha); +// backend->lhs_assemble(); +// backend->set_velocities(flow.freestream); +// backend->compute_rhs(); +// backend->lu_factor(); +// backend->lu_solve(); +// backend->compute_delta_gamma(); +// return AeroCoefficients{ +// backend->compute_coefficient_cl(flow), +// backend->compute_coefficient_cd(flow), +// backend->compute_coefficient_cm(flow) +// }; +// } + +// void strip_alpha_to_vel(const FlowData& flow, Mesh* mesh, f32* local_velocities, f32* strip_alphas) { +// const u64 nb_panels_wing = mesh_nb_panels_wing(mesh); +// f32* lvx = local_velocities + 0 * nb_panels_wing; +// f32* lvy = local_velocities + 1 * nb_panels_wing; +// f32* lvz = local_velocities + 2 * nb_panels_wing; +// for (u64 j = 0; j < mesh->ns; j++) { +// auto fs = compute_freestream(flow.u_inf, strip_alphas[j], 0.0f); +// lvx[j] = fs.x; +// lvy[j] = fs.y; +// lvz[j] = fs.z; +// } +// for (u64 i = 1; i < mesh->nc; i++) { +// std::memcpy(lvx + i * mesh->ns, lvx, mesh->ns * sizeof(*lvx)); +// std::memcpy(lvy + i * mesh->ns, lvy, mesh->ns * sizeof(*lvy)); +// std::memcpy(lvz + i * mesh->ns, lvz, mesh->ns * sizeof(*lvz)); +// } +// } + +// AeroCoefficients NonLinearVLM::solve(const FlowData& flow, const Database& db) { +// f64 err = 1.0f; // l1 error +// auto init_pos = translation_matrix({ +// -100.0f * flow.u_inf*std::cos(flow.alpha), +// 0.0f, +// -100.0f * flow.u_inf*std::sin(flow.alpha) +// }); + +// std::fill(strip_alphas, strip_alphas + backend->hd_mesh->ns, flow.alpha); // memset + +// backend->reset(); +// backend->mesh_move(init_pos); +// backend->mesh_metrics(flow.alpha); +// backend->lhs_assemble(); +// backend->lu_factor(); + +// for (u64 iter = 0; iter < max_iter && err > tol; iter++) { +// err = 0.0; // reset l1 error +// strip_alpha_to_vel(flow, backend->hd_mesh, velocities, strip_alphas); // Compute local panel velocities based on strip alphas +// backend->allocator.hd_memcpy(backend->dd_data->local_velocities, velocities, 3 * mesh_nb_panels_wing(backend->hd_mesh) * sizeof(*velocities)); +// backend->compute_rhs(); // Compute RHS using strip alphas (on CPU) +// backend->lu_solve(); // Copy RHS on device and solve for gamma +// backend->compute_delta_gamma(); // Compute the chordwise delta gammas for force computation - // Parallel Reduce - // loop over the chordwise strips and apply Van Dam algorithm - for (u64 j = 0; j < backend->hd_mesh->ns; j++) { - const f32 strip_area = backend->mesh_area(0, j, backend->hd_mesh->nc, 1); - const FlowData strip_flow = {strip_alphas[j], flow.beta, flow.u_inf, flow.rho}; - const f32 strip_cl = backend->compute_coefficient_cl(strip_flow, strip_area, j, 1); - const f32 effective_aoa = strip_cl / (2.f*PI_f) - strip_flow.alpha + flow.alpha; - - // TODO: interpolated value should be computed at the y mid point of the strip - const f32 correction = (db.interpolate_CL(effective_aoa, 0.f) - strip_cl) / (2.f*PI_f); - // std::printf(">>> Strip: %d | CL: %.3f | Interpolated: %.3f | Correction: %.3e\n", j, strip_cl, db.interpolate_CL(effective_aoa, 0.f), correction); - strip_alphas[j] += correction; - err += (f64)std::abs(correction); - } - err /= (f64)backend->hd_mesh->ns; // normalize l1 error - //std::printf(">>> Iter: %d | Error: %.3e \n", iter, err); - } - return AeroCoefficients{ - backend->compute_coefficient_cl(flow), - backend->compute_coefficient_cd(flow), - backend->compute_coefficient_cm(flow) - }; -} \ No newline at end of file +// // Parallel Reduce +// // loop over the chordwise strips and apply Van Dam algorithm +// for (u64 j = 0; j < backend->hd_mesh->ns; j++) { +// const f32 strip_area = backend->mesh_area(0, j, backend->hd_mesh->nc, 1); +// const FlowData strip_flow = {strip_alphas[j], flow.beta, flow.u_inf, flow.rho}; +// const f32 strip_cl = backend->compute_coefficient_cl(strip_flow, strip_area, j, 1); +// const f32 effective_aoa = strip_cl / (2.f*PI_f) - strip_flow.alpha + flow.alpha; + +// // TODO: interpolated value should be computed at the y mid point of the strip +// const f32 correction = (db.interpolate_CL(effective_aoa, 0.f) - strip_cl) / (2.f*PI_f); +// // std::printf(">>> Strip: %d | CL: %.3f | Interpolated: %.3f | Correction: %.3e\n", j, strip_cl, db.interpolate_CL(effective_aoa, 0.f), correction); +// strip_alphas[j] += correction; +// err += (f64)std::abs(correction); +// } +// err /= (f64)backend->hd_mesh->ns; // normalize l1 error +// //std::printf(">>> Iter: %d | Error: %.3e \n", iter, err); +// } +// return AeroCoefficients{ +// backend->compute_coefficient_cl(flow), +// backend->compute_coefficient_cd(flow), +// backend->compute_coefficient_cm(flow) +// }; +// } \ No newline at end of file diff --git a/vlm/src/vlm_backend.cpp b/vlm/src/vlm_backend.cpp index cb8fe88..00dc14b 100644 --- a/vlm/src/vlm_backend.cpp +++ b/vlm/src/vlm_backend.cpp @@ -13,32 +13,18 @@ using namespace vlm; -// Here we assume constant shape wing since mesh_sref -// TODO: remove this limitation -f32 Backend::compute_coefficient_cl(const FlowData& flow) { - return compute_coefficient_cl(flow, mesh_area(0,0,hd_mesh->nc, hd_mesh->ns), 0, hd_mesh->ns); -} - -linalg::alias::float3 Backend::compute_coefficient_cm(const FlowData& flow) { - return compute_coefficient_cm(flow, mesh_area(0,0,hd_mesh->nc, hd_mesh->ns), mesh_mac(0,hd_mesh->ns), 0, hd_mesh->ns); -} - -f32 Backend::compute_coefficient_cd(const FlowData& flow) { - return compute_coefficient_cd(flow, mesh_area(0,0,hd_mesh->nc, hd_mesh->ns), 0, hd_mesh->ns); -} - -std::unique_ptr vlm::create_backend(const std::string& backend_name, MeshGeom* mesh, int timesteps) { +std::unique_ptr vlm::create_backend(const std::string& backend_name) { //tiny::CPUID cpuid; //cpuid.print_info(); #ifdef VLM_CPU if (backend_name == "cpu") { - return std::make_unique(mesh, timesteps); + return std::make_unique(); } #endif #ifdef VLM_CUDA if (backend_name == "cuda") { - return std::make_unique(mesh, timesteps); + return std::make_unique(); } #endif throw std::runtime_error("Unsupported backend: " + backend_name); @@ -55,54 +41,9 @@ std::vector vlm::get_available_backends() { return backends; } -void Backend::init(MeshGeom* mesh_geom, u64 timesteps) { - hh_mesh_geom = mesh_geom; // Borrowed ptr - const u64 nb_vertices_wing = (hh_mesh_geom->nc+1)*(hh_mesh_geom->ns+1); - - // Mesh structs allocation - hd_mesh_geom = (MeshGeom*)allocator.h_malloc(sizeof(MeshGeom)); - dd_mesh_geom = (MeshGeom*)allocator.d_malloc(sizeof(MeshGeom)); - // hh_mesh = (Mesh2*)allocator.h_malloc(sizeof(Mesh2)); - hd_mesh = (Mesh2*)allocator.h_malloc(sizeof(Mesh2)); - dd_mesh = (Mesh2*)allocator.d_malloc(sizeof(Mesh2)); - hd_data = (Data*)allocator.h_malloc(sizeof(Data)); - dd_data = (Data*)allocator.d_malloc(sizeof(Data)); - - // Allocate mesh buffers on device and host - hd_mesh_geom->vertices = (f32*) allocator.d_malloc(nb_vertices_wing*3*sizeof(f32)); - // mesh_alloc(allocator.h_malloc, hh_mesh, hh_mesh_geom->nc, hh_mesh_geom->ns, timesteps, 0); - mesh_alloc(allocator.d_malloc, hd_mesh, hh_mesh_geom->nc, hh_mesh_geom->ns, timesteps, 0); - data_alloc(allocator.d_malloc, hd_data, hh_mesh_geom->nc, hh_mesh_geom->ns, timesteps); - - // Copy indices - hd_mesh_geom->nc = hh_mesh_geom->nc; - hd_mesh_geom->ns = hh_mesh_geom->ns; - - // Copy raw vertex geometry directly to device - allocator.hd_memcpy(hd_mesh_geom->vertices, hh_mesh_geom->vertices, nb_vertices_wing*3*sizeof(f32)); - - // Copy host-device mesh ptr to device-device - allocator.hd_memcpy(dd_mesh_geom, hd_mesh_geom, sizeof(*hd_mesh_geom)); - allocator.hd_memcpy(dd_mesh, hd_mesh, sizeof(*hd_mesh)); - allocator.hd_memcpy(dd_data, hd_data, sizeof(*hd_data)); -} - Backend::~Backend() { // Free device-device ptrs - allocator.d_free(dd_data); - allocator.d_free(dd_mesh); - allocator.d_free(dd_mesh_geom); allocator.d_free(d_solver_info); allocator.d_free(d_solver_ipiv); allocator.d_free(d_solver_buffer); - - // Free device buffers store in host-device ptrs - allocator.d_free(hd_mesh_geom->vertices); - mesh_free(allocator.d_free, hd_mesh); - data_free(allocator.d_free, hd_data); - - // Free host-device ptrs - allocator.h_free(hd_data); - allocator.h_free(hd_mesh); - allocator.h_free(hd_mesh_geom); } \ No newline at end of file diff --git a/vlm/src/vlm_mesh.cpp b/vlm/src/vlm_mesh.cpp index 84360d2..cc761c6 100644 --- a/vlm/src/vlm_mesh.cpp +++ b/vlm/src/vlm_mesh.cpp @@ -12,117 +12,10 @@ using namespace vlm; -void vlm::mesh_alloc(const malloc_f malloc, Mesh2* mesh, u64 nc, u64 ns, u64 nw, u64 nwa) { - mesh->vertices = (f32*)malloc((nc+nw+1)*(ns+1)*3*sizeof(f32)); - mesh->normals = (f32*)malloc(nc*ns*3*sizeof(f32)); - mesh->colloc = (f32*)malloc(nc*ns*3*sizeof(f32)); - mesh->area = (f32*)malloc(nc*ns*sizeof(f32)); - mesh->nc = nc; - mesh->ns = ns; - mesh->nw = nw; - mesh->nwa = nwa; -} - -void vlm::mesh_copy(const memcpy_f memcpy, Mesh2* dst, const Mesh2* src) { - memcpy(dst->vertices, src->vertices, (dst->nc+dst->nw+1)*(dst->ns+1)*3*sizeof(f32)); - memcpy(dst->normals, src->normals, dst->nc*dst->ns*3*sizeof(f32)); - memcpy(dst->colloc, src->colloc, dst->nc*dst->ns*3*sizeof(f32)); - memcpy(dst->area, src->area, dst->nc*dst->ns*sizeof(f32)); -} - -void vlm::mesh_free(const free_f free, Mesh2* mesh) { - free(mesh->vertices); - free(mesh->normals); - free(mesh->colloc); - free(mesh->area); -} - -// Reads the Plot3D Structured file, allocates vertex buffer and fills it with the file data -void mesh_io_read_file_plot3d(std::ifstream& f, MeshGeom* mesh_geom) { - std::cout << "reading plot3d mesh" << std::endl; - u64 ni, nj, nk, blocks; - f32 x, y, z; - f >> blocks; - if (blocks != 1) { - throw std::runtime_error("Only single block plot3d mesh is supported"); - } - f >> ni >> nj >> nk; - if (nk != 1) { - throw std::runtime_error("Only 2D plot3d mesh is supported"); - } - - mesh_geom->ns = nj - 1; - mesh_geom->nc = ni - 1; - mesh_geom->vertices = new f32[ni*nj*3]; - const u64 nb_panels = mesh_geom->ns * mesh_geom->nc; - const u64 nb_vertices = ni * nj; - - std::cout << "number of panels: " << nb_panels << std::endl; - std::cout << "ns: " << mesh_geom->ns << std::endl; - std::cout << "nc: " << mesh_geom->nc << std::endl; - - for (u64 j = 0; j < nj; j++) { - for (u64 i = 0; i < ni; i++) { - f >> x; - mesh_geom->vertices[nb_vertices * 0 + nj*i + j] = x; - } - } - for (u64 j = 0; j < nj; j++) { - for (u64 i = 0; i < ni; i++) { - f >> y; - mesh_geom->vertices[nb_vertices * 1 + nj*i + j] = y; - } - } - for (u64 j = 0; j < nj; j++) { - for (u64 i = 0; i < ni; i++) { - f >> z; - mesh_geom->vertices[nb_vertices * 2 + nj*i + j] = z; - } - } - - // TODO: this validation is not robust enough when eventually we will be using multiple bodies - // Validation - // const f32 eps = std::numeric_limits::epsilon(); - // if (std::abs(m.v.x[0]) != eps || std::abs(m.v.y[0]) != eps) { - // throw std::runtime_error("First vertex of plot3d mesh must be at origin"); - // } - const f32 first_y = mesh_geom->vertices[nb_vertices * 1 + 0]; - for (u64 i = 1; i < ni; i++) { - if ( mesh_geom->vertices[nb_vertices * 1 + i*nj] != first_y) { - throw std::runtime_error("Mesh vertices should be ordered in chordwise direction"); - } - } -} - -void vlm::mesh_io_read_file(const std::string& filename, MeshGeom* mesh_geom) { - const std::filesystem::path path(filename); - if (!std::filesystem::exists(path)) { - throw std::runtime_error("Mesh file not found"); // TODO: consider not using exceptions anymore - } - - std::ifstream f(path); - try { - if (f.is_open()) { - if (path.extension() == ".x") { - mesh_io_read_file_plot3d(f, mesh_geom); - } else { - throw std::runtime_error("Only structured gridpro mesh format is supported"); - } - f.close(); - } else { - throw std::runtime_error("Failed to open mesh file"); - } - } catch (const std::exception& e) { - std::cerr << e.what() << std::endl; - } -} - -void vlm::mesh_quarterchord(MeshGeom* mesh_geom) { - const u64 nc = mesh_geom->nc; - const u64 ns = mesh_geom->ns; - f32* vx = mesh_geom->vertices + 0 * (nc+1) * (ns+1); - f32* vy = mesh_geom->vertices + 1 * (nc+1) * (ns+1); - f32* vz = mesh_geom->vertices + 2 * (nc+1) * (ns+1); +void mesh_quarterchord(f32* vertices, u64 nc, u64 ns) { + f32* vx = vertices + 0 * (nc+1) * (ns+1); + f32* vy = vertices + 1 * (nc+1) * (ns+1); + f32* vz = vertices + 2 * (nc+1) * (ns+1); for (u64 i = 0; i < nc; i++) { for (u64 j = 0; j < ns+1; j++) { @@ -147,277 +40,106 @@ void vlm::mesh_quarterchord(MeshGeom* mesh_geom) { } } -// void Mesh::init() { -// create_vortex_panels(); -// if (c_ref == 0.0f) c_ref = chord_mean(0, ns+1); // use MAC as default -// if (s_ref == 0.0f) s_ref = panels_area_xy(0,0, nc, ns); // use projected area as default -// compute_connectivity(); -// compute_metrics_wing(); -// } - -// void Mesh::alloc() { -// const u64 ncw = nc + nw; -// v.resize((ncw + 1) * (ns + 1)); -// offsets.resize(nc * ns + 1); -// connectivity.resize(4 * nc * ns); - -// colloc.resize(ncw * ns); -// normal.resize(ncw * ns); -// area.resize(ncw * ns); -// } - -// u64 Mesh::nb_panels_wing() const { return nc * ns; }; -// u64 Mesh::nb_panels_total() const { return (nc+nw) * ns; }; -// u64 Mesh::nb_vertices_wing() const { return (nc + 1) * (ns + 1); }; -// u64 Mesh::nb_vertices_total() const { return (nc + nw + 1) * (ns + 1); }; - -// /// @brief Computes the mean chord of a set of panels -// /// @details -// /// Mean Aerodynamic Chord = \frac{2}{S} \int_{0}^{b/2} c(y)^{2} dy -// /// Integration using the Trapezoidal Rule -// /// Validated against empirical formulas for tapered wings -// /// @param j first panel index chordwise -// /// @param n number of panels spanwise -// /// @return mean chord of the set of panels -// f32 Mesh::chord_mean(const u64 j, const u64 n) const { -// assert(j + n <= ns+1); // spanwise range -// assert(n > 1); // minimum 2 chords -// f32 mac = 0.0f; -// // loop over panel chordwise sections in spanwise direction -// // Note: can be done optimally with vertical fused simd -// for (u64 v = 0; v < n - 1; v++) { -// const f32 c0 = chord_length(j + v); -// const f32 c1 = chord_length(j + v + 1); -// mac += 0.5f * (c0 * c0 + c1 * c1) * panel_width_y(0, j + v); -// } -// // Since we divide by half the total wing area (both sides) we dont need to multiply by 2 -// return mac / panels_area_xy(0, j, nc, n-1); -// } - -// /// @brief Computes the total area of a 2D set of panels -// /// @param i first panel index chordwise -// /// @param j first panel panel index spanwise -// /// @param m number of panels chordwise -// /// @param n number of panels spanwise -// /// @return sum of the areas of the panels -// f32 Mesh::panels_area(const u64 i, const u64 j, const u64 m, const u64 n) const { -// assert(i + m <= nc); -// assert(j + n <= ns); - -// const u64 ld = ns; -// const f32* areas = &area[j + i * ld]; - -// f32 total_area = 0.0f; -// for (u64 u = 0; u < m; u++) { -// for (u64 v = 0; v < n; v++) { -// total_area += areas[v + u * ld]; -// } -// } -// return total_area; -// } - -// /// @brief Computes the total area of a 2D set of panels projected on the xy plane -// /// @param i first panel index chordwise -// /// @param j first panel panel index spanwise -// /// @param m number of panels chordwise -// /// @param n number of panels spanwise -// /// @return sum of the areas of the panels projected on the xy plane -// f32 Mesh::panels_area_xy(const u64 i, const u64 j, const u64 m, const u64 n) const { -// assert(i + m <= nc); -// assert(j + n <= ns); - -// // Area of a quad: -// // 0.5 * || d1 x d2 || where d1 and d2 are the diagonals of the quad -// f32 total_area = 0.0f; -// // Note: this is highly inefficient as vertex coordinates should be loaded with simd -// for (u64 u = 0; u < m; u++) { -// for (u64 v = 0; v < n; v++) { -// const u64 idx = j + v + (i+u) * ns; -// const linalg::alias::float3 d1 = get_v0(idx) - get_v2(idx); -// const linalg::alias::float3 d2 = get_v1(idx) - get_v3(idx); -// const linalg::alias::float3 cross = linalg::cross(d1, d2); - -// total_area += 0.5f * std::abs(cross.z); -// // std::cout << "area xy: " << 0.5f * std::abs(cross.z()) << "\n"; -// // std::cout << "area: " << area[idx] << "\n"; -// } -// } -// return total_area; -// } - -// f32 Mesh::panel_length(const u64 i, const u64 j) const { -// // Since chordwise segments are always parallel, we can simply measure the width of the first panel -// assert(i < nc); -// assert(j < ns); -// const linalg::alias::float3 v0 = get_v0(j + i * ns); -// const linalg::alias::float3 v1 = get_v1(j + i * ns); -// const linalg::alias::float3 v2 = get_v2(j + i * ns); -// const linalg::alias::float3 v3 = get_v3(j + i * ns); -// return 0.5f * (linalg::length(v3-v0) + linalg::length(v2-v1)); -// } - -// /// @brief Computes the width of a single panel in pure y direction -// /// @param i panel index chordwise -// /// @param j panel index spanwise -// f32 Mesh::panel_width_y(const u64 i, const u64 j) const { -// assert(i < nc); -// assert(j < ns); -// const u64 ld = ns + 1; - -// return v.y[j + 1 + i * ld] - v.y[j + i * ld]; -// } - -// f32 Mesh::strip_width(const u64 j) const { -// assert(j < ns); -// const linalg::alias::float3 v0 = get_v0(j); -// const linalg::alias::float3 v1 = get_v1(j); -// return linalg::length(v1 - v0); -// } - -// /// @brief Computes the chord length of a chordwise segment -// /// @details Since the mesh follows the camber line, the chord length is computed -// /// as the distance between the first and last vertex of a chordwise segment -// /// @param j chordwise segment index -// f32 Mesh::chord_length(const u64 j) const { -// assert(j <= ns); // spanwise vertex range -// const f32 dx = v.x[j + nc * (ns+1)] - v.x[j]; -// const f32 dy = 0.0f; // chordwise segments are parallel to the x axis -// const f32 dz = v.z[j + nc * (ns+1)] - v.z[j]; - -// return std::sqrt(dx * dx + dy * dy + dz * dz); -// } - -// linalg::alias::float3 Mesh::get_v0(u64 i) const { -// const u64 idx = i + i / ns; -// return {v.x[idx], v.y[idx], v.z[idx]}; -// } - -// linalg::alias::float3 Mesh::get_v1(u64 i) const { -// const u64 idx = i + i / ns + 1; -// return {v.x[idx], v.y[idx], v.z[idx]}; -// } - -// linalg::alias::float3 Mesh::get_v2(u64 i) const { -// const u64 idx = i + i / ns + ns + 2; -// return {v.x[idx], v.y[idx], v.z[idx]}; -// } - -// linalg::alias::float3 Mesh::get_v3(u64 i) const { -// const u64 idx = i + i / ns + ns + 1; -// return {v.x[idx], v.y[idx], v.z[idx]}; -// } - -// void Mesh::update_wake(const linalg::alias::float3& freestream) { -// const f32 chord_root = chord_length(0); -// const f32 off_x = freestream.x * 100.0f * chord_root; -// const f32 off_y = freestream.y * 100.0f * chord_root; -// const f32 off_z = freestream.z * 100.0f * chord_root; - -// const u64 v_ns = ns + 1; -// const u64 begin_trailing_edge = nb_vertices_wing()-v_ns; -// const u64 end_trailing_edge = nb_vertices_wing(); -// // Add one layer of wake vertices -// // this can be parallelized (careful to false sharing tho) -// for (u64 i = begin_trailing_edge; i < end_trailing_edge; ++i) { -// v.x[i + v_ns] = v.x[i] + off_x; -// v.y[i + v_ns] = v.y[i] + off_y; -// v.z[i + v_ns] = v.z[i] + off_z; -// } -// compute_metrics_wake(); -// current_nw = 1; -// } - -// // https://publications.polymtl.ca/2555/1/2017_MatthieuParenteau.pdf (Eq 3.4 p21) -// void Mesh::correction_high_aoa(f32 alpha_rad) { -// const f32 factor = 0.5f * alpha_rad / (std::sin(alpha_rad) + EPS); // correction factor -// // Note: this can be vectorized and parallelized -// for (u64 i = 0; i < nb_panels_total(); i++) { -// // "chord vector" from center of leading line (v0-v1) to trailing line (v3-v2) -// const linalg::alias::float3 chord_vec = 0.5f * (get_v2(i) + get_v3(i) - get_v0(i) - get_v1(i)); -// const linalg::alias::float3 colloc_pt = 0.5f * (get_v0(i) + get_v1(i)) + factor * chord_vec; -// // collocation calculated as a translation of the center of leading line center -// colloc.x[i] = colloc_pt.x; -// colloc.y[i] = colloc_pt.y; -// colloc.z[i] = colloc_pt.z; -// } -// } - -// void Mesh::compute_connectivity() { -// // indices of the points forming the quad -// // span(y) -> -// // chord(x) 0--1 -// // | | | -// // \/ 3--2 -// std::array quad = {}; - -// // Note: with some modifications this loop can be parallelized -// for (u64 i = 0; i < nb_panels_wing(); i++) { -// // extra offset occuring from the fact that there is 1 more vertex -// // per row than surfaces -// u64 chord_idx = i / nc; -// quad[0] = i + chord_idx; -// quad[1] = quad[0] + 1; -// quad[2] = quad[1] + ns; -// quad[3] = quad[0] + ns; -// offsets[i+1] = offsets[i] + 4; -// connectivity.insert(connectivity.end(), quad.begin(), quad.end()); -// } -// } - -// void Mesh::compute_metrics_i(u64 i ) { -// const linalg::alias::float3 v0 = get_v0(i); -// const linalg::alias::float3 v1 = get_v1(i); -// const linalg::alias::float3 v2 = get_v2(i); -// const linalg::alias::float3 v3 = get_v3(i); +class Plot3DFile : public MeshFile { +public: + SurfDims get_dims(std::ifstream& f) const override { + u64 ni, nj, nk, blocks; + f >> blocks; + if (blocks != 1) { + throw std::runtime_error("Only single block plot3d mesh is supported"); + } + f >> ni >> nj >> nk; + if (nk != 1) { + throw std::runtime_error("Only 2D plot3d mesh is supported"); + } + return {ni-1, nj-1}; + } -// // Vector v0 from p1 to p3 -// const linalg::alias::float3 vec_0 = v3 - v1; + SurfDims read(std::ifstream& f, f32* vertices) const override { + u64 ni, nj, nk, blocks; + f32 x, y, z; + f >> blocks; + f >> ni >> nj >> nk; + assert(mesh->ns == nj - 1); + assert(mesh->nc == ni - 1); + const u64 nb_vertices = ni * nj; + + for (u64 j = 0; j < nj; j++) { + for (u64 i = 0; i < ni; i++) { + f >> x; + vertices[nb_vertices * 0 + nj*i + j] = x; + } + } + for (u64 j = 0; j < nj; j++) { + for (u64 i = 0; i < ni; i++) { + f >> y; + vertices[nb_vertices * 1 + nj*i + j] = y; + } + } + for (u64 j = 0; j < nj; j++) { + for (u64 i = 0; i < ni; i++) { + f >> z; + vertices[nb_vertices * 2 + nj*i + j] = z; + } + } -// // Vector v1 from p0 to p2 -// const linalg::alias::float3 vec_1 = v2 - v0; + // TODO: this validation is not robust enough when eventually we will be using multiple bodies + // Validation + // const f32 eps = std::numeric_limits::epsilon(); + // if (std::abs(m.v.x[0]) != eps || std::abs(m.v.y[0]) != eps) { + // throw std::runtime_error("First vertex of plot3d mesh must be at origin"); + // } + const f32 first_y =vertices[nb_vertices * 1 + 0]; + for (u64 i = 1; i < ni; i++) { + if ( vertices[nb_vertices * 1 + i*nj] != first_y) { + throw std::runtime_error("Mesh vertices should be ordered in chordwise direction"); + } + } + return {ni-1, nj-1}; + } +}; -// // Normal = v0 x v1 -// const linalg::alias::float3 normal_vec = linalg::normalize(linalg::cross(vec_0, vec_1)); -// normal.x[i] = normal_vec.x; -// normal.y[i] = normal_vec.y; -// normal.z[i] = normal_vec.z; +class MeshIO { +public: + MeshIO(const std::string& format) { + if (format == "plot3d") { + _file = std::make_unique(); + } else { + throw std::runtime_error("Unsupported file format"); + } + } -// // 3 vectors f (P0P3), b (P0P2), e (P0P1) to compute the area: -// // area = 0.5 * (||f x b|| + ||b x e||) -// // this formula might also work: area = || 0.5 * ( f x b + b x e ) || -// const linalg::alias::float3 vec_f = v3 - v0; -// const linalg::alias::float3 vec_b = v2 - v0; -// const linalg::alias::float3 vec_e = v1 - v0; + SurfDims get_dims(const std::string& filename) const { + std::ifstream file(filename); + if (!file.is_open()) { + throw std::runtime_error("Failed to open mesh file"); + } + auto dims = _file->get_dims(file); + std::cout << "Number of panels: " << dims.first * dims.second << "\n"; + std::cout << "nc: " << dims.first << "\n"; + std::cout << "ns: " << dims.second << "\n"; + return dims; + } -// area[i] = 0.5f * (linalg::length(linalg::cross(vec_f, vec_b)) + linalg::length(linalg::cross(vec_b, vec_e))); - -// // collocation point (center of the quad) -// const linalg::alias::float3 colloc_pt = 0.25f * (v0 + v1 + v2 + v3); - -// colloc.x[i] = colloc_pt.x; -// colloc.y[i] = colloc_pt.y; -// colloc.z[i] = colloc_pt.z; -// } + void read(const std::string& filename, f32* vertices) const { + std::ifstream file(filename); + if (!file.is_open()) { + throw std::runtime_error("Failed to open mesh file"); + } + auto [nc, ns] = _file->read(file, vertices); + mesh_quarterchord(vertices, nc, ns); + } -// void Mesh::compute_metrics_wing() { -// for (u64 i = 0; i < nb_panels_wing(); i++) { -// compute_metrics_i(i); -// } -// } +private: + std::unique_ptr _file; +}; -// void Mesh::compute_metrics_wake() { -// for (u64 i = nb_panels_wing(); i < nb_panels_total(); i++) { -// compute_metrics_i(i); -// } -// } +// // TODO: DEPRECATE ALL BELOW -// // plot3d is chordwise major -// void Mesh::io_read_plot3d_structured(std::ifstream& f) { +// // Reads the Plot3D Structured file, allocates vertex buffer and fills it with the file data +// void mesh_io_read_file_plot3d(std::ifstream& f, MeshGeom* mesh_geom) { // std::cout << "reading plot3d mesh" << std::endl; -// u64 ni = 0; // number of vertices chordwise -// u64 nj = 0; // number of vertices spanwise -// u64 nk = 0; -// u64 blocks = 0; +// u64 ni, nj, nk, blocks; // f32 x, y, z; // f >> blocks; // if (blocks != 1) { @@ -428,30 +150,32 @@ void vlm::mesh_quarterchord(MeshGeom* mesh_geom) { // throw std::runtime_error("Only 2D plot3d mesh is supported"); // } -// ns = nj - 1; -// nc = ni - 1; -// alloc(); +// mesh_geom->ns = nj - 1; +// mesh_geom->nc = ni - 1; +// mesh_geom->vertices = new f32[ni*nj*3]; +// const u64 nb_panels = mesh_geom->ns * mesh_geom->nc; +// const u64 nb_vertices = ni * nj; -// std::cout << "number of panels: " << nb_panels_wing() << std::endl; -// std::cout << "ns: " << ns << std::endl; -// std::cout << "nc: " << nc << std::endl; +// std::cout << "number of panels: " << nb_panels << std::endl; +// std::cout << "ns: " << mesh_geom->ns << std::endl; +// std::cout << "nc: " << mesh_geom->nc << std::endl; // for (u64 j = 0; j < nj; j++) { // for (u64 i = 0; i < ni; i++) { // f >> x; -// v.x[nj*i + j] = x; +// mesh_geom->vertices[nb_vertices * 0 + nj*i + j] = x; // } // } // for (u64 j = 0; j < nj; j++) { // for (u64 i = 0; i < ni; i++) { // f >> y; -// v.y[nj*i + j] = y; +// mesh_geom->vertices[nb_vertices * 1 + nj*i + j] = y; // } // } // for (u64 j = 0; j < nj; j++) { // for (u64 i = 0; i < ni; i++) { // f >> z; -// v.z[nj*i + j] = z; +// mesh_geom->vertices[nb_vertices * 2 + nj*i + j] = z; // } // } @@ -461,64 +185,63 @@ void vlm::mesh_quarterchord(MeshGeom* mesh_geom) { // // if (std::abs(m.v.x[0]) != eps || std::abs(m.v.y[0]) != eps) { // // throw std::runtime_error("First vertex of plot3d mesh must be at origin"); // // } -// const f32 first_y = v.y[0]; +// const f32 first_y = mesh_geom->vertices[nb_vertices * 1 + 0]; // for (u64 i = 1; i < ni; i++) { -// if (v.y[i * nj] != first_y) { +// if ( mesh_geom->vertices[nb_vertices * 1 + i*nj] != first_y) { // throw std::runtime_error("Mesh vertices should be ordered in chordwise direction"); // } // } // } -// void Mesh::io_read(const std::string& filename) { +// void vlm::mesh_io_read_file(const std::string& filename, MeshGeom* mesh_geom) { // const std::filesystem::path path(filename); // if (!std::filesystem::exists(path)) { -// throw std::runtime_error("Mesh file not found"); +// throw std::runtime_error("Mesh file not found"); // TODO: consider not using exceptions anymore // } // std::ifstream f(path); -// if (f.is_open()) { -// if (path.extension() == ".x") { -// io_read_plot3d_structured(f); +// try { +// if (f.is_open()) { +// if (path.extension() == ".x") { +// mesh_io_read_file_plot3d(f, mesh_geom); +// } else { +// throw std::runtime_error("Only structured gridpro mesh format is supported"); +// } +// f.close(); // } else { -// throw std::runtime_error("Only structured gridpro mesh format is supported"); +// throw std::runtime_error("Failed to open mesh file"); // } -// f.close(); -// } else { -// throw std::runtime_error("Failed to open mesh file"); +// } catch (const std::exception& e) { +// std::cerr << e.what() << std::endl; // } // } -// void Mesh::move(const linalg::alias::float4x4& transform, const SoA_3D_t& origin_pos) { -// // const tiny::ScopedTimer t("Mesh::move"); -// assert(current_nw < nw); // check if we have capacity - -// // Shed wake before moving -// const u64 src_begin = nb_vertices_wing() - (ns + 1); -// const u64 src_end = nb_vertices_wing(); -// const u64 dst_begin = (nc + nw - current_nw) * (ns + 1); -// std::copy(v.x.data() + src_begin, v.x.data() + src_end, v.x.data() + dst_begin); -// std::copy(v.y.data() + src_begin, v.y.data() + src_end, v.y.data() + dst_begin); -// std::copy(v.z.data() + src_begin, v.z.data() + src_end, v.z.data() + dst_begin); - -// // Perform the movement -// for (u64 i = 0; i < nb_vertices_wing(); i++) { -// const linalg::alias::float4 transformed_pt = linalg::mul(transform, linalg::alias::float4{origin_pos.x[i], origin_pos.y[i], origin_pos.z[i], 1.f}); -// v.x[i] = transformed_pt.x; -// v.y[i] = transformed_pt.y; -// v.z[i] = transformed_pt.z; -// } - -// compute_metrics_wing(); // compute the new wing panel metrics +// void vlm::mesh_quarterchord(MeshGeom* mesh_geom) { +// const u64 nc = mesh_geom->nc; +// const u64 ns = mesh_geom->ns; +// f32* vx = mesh_geom->vertices + 0 * (nc+1) * (ns+1); +// f32* vy = mesh_geom->vertices + 1 * (nc+1) * (ns+1); +// f32* vz = mesh_geom->vertices + 2 * (nc+1) * (ns+1); -// // Copy new trailing edge vertices on the wake buffer -// std::copy(v.x.data() + src_begin, v.x.data() + src_end, v.x.data() + dst_begin - (ns + 1)); -// std::copy(v.y.data() + src_begin, v.y.data() + src_end, v.y.data() + dst_begin - (ns + 1)); -// std::copy(v.z.data() + src_begin, v.z.data() + src_end, v.z.data() + dst_begin - (ns + 1)); +// for (u64 i = 0; i < nc; i++) { +// for (u64 j = 0; j < ns+1; j++) { +// const u64 v0 = (i+0) * (ns+1) + j; +// const u64 v3 = (i+1) * (ns+1) + j; -// current_nw++; -// } +// vx[v0] = 0.75f * vx[v0] + 0.25f * vx[v3]; +// vy[v0] = 0.75f * vy[v0] + 0.25f * vy[v3]; +// vz[v0] = 0.75f * vz[v0] + 0.25f * vz[v3]; +// } +// } -// void Mesh::resize_wake(const u64 nw_) { -// nw = nw_; -// alloc(); // resizes the buffers +// // Trailing edge vertices +// const u64 i = nc-1; +// for (u64 j = 0; j < ns+1; j++) { +// const u64 v0 = (i+0) * (ns+1) + j; +// const u64 v3 = (i+1) * (ns+1) + j; + +// vx[v3] = (4.f/3.f) * vx[v3] - (1.f/3.f) * vx[v0]; +// vy[v3] = (4.f/3.f) * vy[v3] - (1.f/3.f) * vy[v0]; +// vz[v3] = (4.f/3.f) * vz[v3] - (1.f/3.f) * vz[v0]; +// } // }