From 06a83c168412567c4532a160c49fb4559d5d0df6 Mon Sep 17 00:00:00 2001 From: Samuel Ayala Date: Mon, 10 Jun 2024 21:22:47 -0400 Subject: [PATCH] wip sync --- vlm/backends/cpu/include/vlm_backend_cpu.hpp | 5 +- vlm/backends/cpu/src/vlm_backend_cpu.cpp | 146 +++- .../cpu/src/vlm_backend_cpu_kernels.ispc | 62 +- .../cuda/include/vlm_backend_cuda.hpp | 14 +- vlm/include/vlm_allocator.hpp | 6 +- vlm/include/vlm_backend.hpp | 16 +- vlm/include/vlm_data.hpp | 22 +- vlm/include/vlm_mesh.hpp | 179 ++-- vlm/src/vlm_backend.cpp | 50 +- vlm/src/vlm_data.cpp | 26 +- vlm/src/vlm_mesh.cpp | 782 +++++++++--------- 11 files changed, 707 insertions(+), 601 deletions(-) diff --git a/vlm/backends/cpu/include/vlm_backend_cpu.hpp b/vlm/backends/cpu/include/vlm_backend_cpu.hpp index b66435a..2f74569 100644 --- a/vlm/backends/cpu/include/vlm_backend_cpu.hpp +++ b/vlm/backends/cpu/include/vlm_backend_cpu.hpp @@ -7,7 +7,7 @@ namespace vlm { class BackendCPU : public Backend { public: - BackendCPU(); + BackendCPU(MeshGeom* mesh, u64 timesteps); ~BackendCPU(); void reset() override; void lhs_assemble() override; @@ -24,6 +24,9 @@ class BackendCPU : public Backend { void compute_delta_gamma() override; void set_velocities(const linalg::alias::float3& vel) override; void set_velocities(const SoA_3D_t& vels) override; + private: + f32 mesh_mac(u64 j, u64 n) override; // mean chord + f32 mesh_area(const u64 i, const u64 j, const u64 m, const u64 n) override; // mean span }; } // namespace vlm \ No newline at end of file diff --git a/vlm/backends/cpu/src/vlm_backend_cpu.cpp b/vlm/backends/cpu/src/vlm_backend_cpu.cpp index 361c8c0..c6fd580 100644 --- a/vlm/backends/cpu/src/vlm_backend_cpu.cpp +++ b/vlm/backends/cpu/src/vlm_backend_cpu.cpp @@ -20,49 +20,42 @@ #include using namespace vlm; - -template -void print_buffer(const T* start, u64 size) { - std::cout << "["; - for (u64 i = 0; i < size; i++) { - std::cout << start[i] << ","; - } - std::cout << "]\n"; -} - using namespace linalg::ostream_overloads; BackendCPU::~BackendCPU() = default; // Destructor definition -// TODO: replace any kind of size arithmetic with methods -BackendCPU::BackendCPU(Mesh& mesh) : Backend(mesh) { - lhs.resize(mesh.nb_panels_wing() * mesh.nb_panels_wing()); - - rollup_vertices.resize(mesh.nb_vertices_total()); // TODO: exclude the wing vertices - local_velocities.resize(mesh.nb_panels_wing()); - rhs.resize(mesh.nb_panels_wing()); - ipiv.resize(mesh.nb_panels_wing()); - gamma.resize((mesh.nc + mesh.nw) * mesh.ns); // store wake gamma as well - gamma_prev.resize(mesh.nb_panels_wing()); - delta_gamma.resize(mesh.nb_panels_wing()); - trefftz_buffer.resize(mesh.ns); +BackendCPU::BackendCPU(MeshGeom* mesh, u64 timesteps) { + allocator.h_malloc = std::malloc; + allocator.d_malloc = std::malloc; + allocator.h_free = std::free; + allocator.d_free = std::free; + allocator.hh_memcpy = std::memcpy; + allocator.hd_memcpy = std::memcpy; + allocator.dh_memcpy = std::memcpy; + allocator.dd_memcpy = std::memcpy; + allocator.h_memset = std::memset; + allocator.d_memset = std::memset; + init(mesh, timesteps); } void BackendCPU::reset() { // TODO: verify that these are needed - std::fill(gamma.begin(), gamma.end(), 0.0f); - std::fill(lhs.begin(), lhs.end(), 0.0f); - std::fill(rhs.begin(), rhs.end(), 0.0f); + // std::fill(gamma.begin(), gamma.end(), 0.0f); + // std::fill(lhs.begin(), lhs.end(), 0.0f); + // std::fill(rhs.begin(), rhs.end(), 0.0f); } void BackendCPU::compute_delta_gamma() { + const u64 nc = hd_mesh->nc; + const u64 ns = hd_mesh->ns; + const u64 nw = hd_mesh->nw; // const tiny::ScopedTimer timer("Delta Gamma"); - std::copy(gamma.data(), gamma.data()+mesh.ns, delta_gamma.data()); + std::copy(dd_data->gamma, dd_data->gamma+ns, dd_data->delta_gamma); // note: this is efficient as the memory is contiguous - for (u64 i = 1; i < mesh.nc; i++) { - for (u64 j = 0; j < mesh.ns; j++) { - delta_gamma[i*mesh.ns + j] = gamma[i*mesh.ns + j] - gamma[(i-1)*mesh.ns + j]; + for (u64 i = 1; i < nc; i++) { + for (u64 j = 0; j < ns; j++) { + dd_data->delta_gamma[i*ns + j] = dd_data->gamma[i*ns + j] - dd_data->gamma[(i-1)*ns + j]; } } } @@ -114,10 +107,16 @@ void BackendCPU::lhs_assemble() { void BackendCPU::compute_rhs() { const tiny::ScopedTimer timer("RHS"); - const Mesh& m = mesh; - - for (u64 i = 0; i < m.nb_panels_wing(); i++) { - rhs[i] = - (local_velocities.x[i] * m.normal.x[i] + local_velocities.y[i] * m.normal.y[i] + local_velocities.z[i] * m.normal.z[i]); + const u64 nc = hd_mesh->nc; + const u64 ns = hd_mesh->ns; + const u64 nw = hd_mesh->nw; + const u64 nb_panels_wing = nc * ns; + + for (u64 i = 0; i < nb_panels_wing(); i++) { + rhs[i] = - ( + dd_data->local_velocities[i + 0 * nb_panels_wing] * dd_mesh->normal[i + 0 * nb_panels_wing] + + dd_data->local_velocities[i + 1 * nb_panels_wing] * dd_mesh->normal[i + 1 * nb_panels_wing] + + dd_data->local_velocities[i + 2 * nb_panels_wing] * dd_mesh->normal[i + 2 * nb_panels_wing]); } } @@ -361,3 +360,84 @@ void BackendCPU::set_velocities(const SoA_3D_t& vels) { std::copy(vels.y.begin(), vels.y.end(), local_velocities.y.begin()); std::copy(vels.z.begin(), vels.z.end(), local_velocities.z.begin()); } + +/// @brief Computes the chord length of a chordwise segment +/// @details Since the mesh follows the camber line, the chord length is computed +/// as the distance between the first and last vertex of a chordwise segment +/// @param j chordwise segment index +f32 Mesh::chord_length(const u64 j) const { + const f32 dx = v.x[j + nc * (ns+1)] - v.x[j]; + const f32 dy = 0.0f; // chordwise segments are parallel to the x axis + const f32 dz = v.z[j + nc * (ns+1)] - v.z[j]; + + return std::sqrt(dx * dx + dy * dy + dz * dz); +} + +// i: chordwise index, j: spanwise index, k: x,y,z dim +#define PTR_MESH_V(m, i,j,k) (m->vertices + j + i * (m->ns+1) + k * (m->nc+m->nw+1) * (m->ns+1)) + +/// @brief Computes the mean chord of a set of panels +/// @details +/// Mean Aerodynamic Chord = \frac{2}{S} \int_{0}^{b/2} c(y)^{2} dy +/// Integration using the Trapezoidal Rule +/// Validated against empirical formulas for tapered wings +/// @param j first panel index spanwise +/// @param n number of panels spanwise +/// @return mean chord of the set of panels +f32 BackendCPU::mesh_mac(u64 j, u64 n) { + assert(j + n <= hd_mesh->ns); // spanwise range + assert(n > 0); + u64 nc = hd_mesh->nc; + u64 ns = hd_mesh->ns; + u64 nw = hd_mesh->nw; + + // Leading edge vertex + f32* lvx = PTR_MESH_V(hd_mesh, 0,0,0); + f32* lvy = PTR_MESH_V(hd_mesh, 0,0,1); + f32* lvz = PTR_MESH_V(hd_mesh, 0,0,2); + // Trailing edge vertex + f32* tvx = PTR_MESH_V(hd_mesh, nc,0,0); + f32* tvy = PTR_MESH_V(hd_mesh, nc,0,1); + f32* tvz = PTR_MESH_V(hd_mesh, nc,0,2); + + f32 mac = 0.0f; + // loop over panel chordwise sections in spanwise direction + // Note: can be done optimally with vertical fused simd + for (u64 v = j; v < j+n; v++) { + // left and right chord lengths + const f32 dx0 = tvx[v] - lvx[v]; + const f32 dy0 = tvy[v] - lvy[v]; + const f32 dz0 = tvz[v] - lvz[v]; + const f32 dx1 = tvx[v + 1] - lvx[v + 1]; + const f32 dy1 = tvy[v + 1] - lvy[v + 1]; + const f32 dz1 = tvz[v + 1] - lvz[v + 1]; + const f32 c0 = std::sqrt(dx0 * dx0 + dy0 * dy0 + dz0 * dz0); + const f32 c1 = std::sqrt(dx1 * dx1 + dy1 * dy1 + dz1 * dz1); + // Panel width + const f32 dx3 = lvx[v + 1] - lvx[v]; + const f32 dy3 = lvy[v + 1] - lvy[v]; + const f32 dz3 = lvz[v + 1] - lvz[v]; + const f32 width = std::sqrt(dx3 * dx3 + dy3 * dy3 + dz3 * dz3); + + mac += 0.5f * (c0 * c0 + c1 * c1) * width; + } + // Since we divide by half the total wing area (both sides) we dont need to multiply by 2 + return mac / mesh_area(0, j, nc, n); +} + +f32 BackendCPU::mesh_area(const u64 i, const u64 j, const u64 m, const u64 n) { + assert(i + m <= nc); + assert(j + n <= ns); + u64 nc = hd_mesh->nc; + u64 ns = hd_mesh->ns; + u64 nw = hd_mesh->nw; + + const f32* areas = hd_mesh->area + j + i * (m->ns); + f32 area_sum = 0.0f; + for (u64 u = 0; u < m; u++) { + for (u64 v = 0; v < n; v++) { + area_sum += areas[v + u * ns]; + } + } + return area_sum; +} diff --git a/vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc b/vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc index 9a1b5bd..5f989af 100644 --- a/vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc +++ b/vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc @@ -27,30 +27,29 @@ inline uniform float length2(const uniform float3& v) {return LENGTH2(v);} inline float length(const float3& v) {return LENGTH(v);} inline uniform float length(const uniform float3& v) {return LENGTH(v);} -// Aggregate Structures -export struct SoA3D { - uniform float* uniform x; - uniform float* uniform y; - uniform float* uniform z; -}; - -export struct MeshProxy { - uniform uint64 ns; - uniform uint64 nc; - uniform uint64 nb_panels; - uniform SoA3D v; // vertices - uniform SoA3D colloc; // collocation points - uniform SoA3D normal; // normals -}; - -export struct MeshView { - uniform uint64 ni; // wing chord panels - uniform uint64 nj; // wing span panels - uniform uint64 nw; // wake chord capacity - uniform uint64 cw; // number of active wake panels - uniform SoA3D v; // vertices - uniform SoA3D colloc; // collocation points - uniform SoA3D normal; // normals +struct Mesh2 { + f32* vertices = nullptr; // (nc+nw+1)*(ns+1)*3 + // TODO: evaluate if we really need those values or we can just compute on the fly in the kernels + f32* normals = nullptr; // nc*ns*3 + f32* colloc = nullptr; // nc*ns*3 + f32* area = nullptr; // nc*ns + + u64 nc = 0; // number of wing panels chordwise + u64 ns = 0; // number of wing panels spanwise + u64 nw = 0; // number of wake panels chordwise + u64 nwa = 0; // number of wake panels active chordwise + + f32 s_ref = 0.0f; // reference area + f32 c_ref = 0.0f; // reference chord + f32 frame[16] = { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + }; // Col major order (TODO: move this to kinematic tracker) + + char* name = nullptr; // mesh name + bool lifting = true; }; // Bio-savart Kernel @@ -58,6 +57,7 @@ export struct MeshView { #define RCUT2 1e-5f #define PI_f 3.141593f +#define PTR_MESH_V(m, i,j,k) (m->vertices + j + i * (m->ns+1) + k * (m->nc+m->nw+1) * (m->ns+1)) template inline float3 kernel_biosavart(C& colloc, const V& vertex1, const V& vertex2, const uniform float& sigma) { @@ -95,7 +95,7 @@ inline void kernel_symmetry(float3& inf, C colloc, const V& vertex0, const V& ve } export void kernel_influence( - uniform const MeshProxy& m, + uniform const Mesh2* uniform m, uniform float* uniform lhs, uniform uint64 ia, uniform uint64 lidx, uniform float sigma ) { @@ -105,12 +105,14 @@ export void kernel_influence( const uniform uint64 v3 = v0 + m.ns+1; const uniform uint64 v2 = v3 + 1; - // Broadcast vertices - uniform float3 vertex0 = {m.v.x[v0], m.v.y[v0], m.v.z[v0]}; - uniform float3 vertex1 = {m.v.x[v1], m.v.y[v1], m.v.z[v1]}; - uniform float3 vertex2 = {m.v.x[v2], m.v.y[v2], m.v.z[v2]}; - uniform float3 vertex3 = {m.v.x[v3], m.v.y[v3], m.v.z[v3]}; + const uniform float* uniform vx = PTR_MESH_V(m, 0, 0, 0); + const uniform float* uniform vy = PTR_MESH_V(m, 0, 0, 1); + const uniform float* uniform vz = PTR_MESH_V(m, 0, 0, 2); + uniform float3 vertex0 = {vx[v0], vy[v0], vz[v0]}; + uniform float3 vertex1 = {vx[v1], vy[v1], vz[v1]}; + uniform float3 vertex2 = {vx[v2], vy[v2], vz[v2]}; + uniform float3 vertex3 = {vx[v3], vy[v3], vz[v3]}; // print("Influencer %: \n", lidx); // print("Vertex 0: % % %\n", vertex0.x, vertex0.y, vertex0.z); diff --git a/vlm/backends/cuda/include/vlm_backend_cuda.hpp b/vlm/backends/cuda/include/vlm_backend_cuda.hpp index e088698..3fd040d 100644 --- a/vlm/backends/cuda/include/vlm_backend_cuda.hpp +++ b/vlm/backends/cuda/include/vlm_backend_cuda.hpp @@ -25,21 +25,9 @@ struct MeshProxy { class BackendCUDA : public Backend { public: - BackendCPU default_backend; // temporary - float* d_lhs = nullptr; - float* d_rhs = nullptr; - float* d_gamma = nullptr; - float* d_delta_gamma = nullptr; - int* d_solver_info = nullptr; - int* d_solver_ipiv = nullptr; - float* d_solver_buffer = nullptr; - - MeshProxy h_mesh; - MeshProxy* d_mesh; - - BackendCUDA(Mesh& mesh); + BackendCUDA(MeshGeom* mesh, u64 timesteps); ~BackendCUDA(); void reset() override; diff --git a/vlm/include/vlm_allocator.hpp b/vlm/include/vlm_allocator.hpp index ae72014..3ac1e88 100644 --- a/vlm/include/vlm_allocator.hpp +++ b/vlm/include/vlm_allocator.hpp @@ -2,10 +2,10 @@ namespace vlm { -typedef void* (*malloc_f)(unsigned long long size); +typedef void* (*malloc_f)(size_t size); typedef void (*free_f)(void* ptr); -typedef void* (*memcpy_f)(void* dst, const void* src, unsigned long long size); -typedef void* (*memset_f)(void* dst, int value, unsigned long long size); +typedef void* (*memcpy_f)(void* dst, const void* src, size_t size); +typedef void* (*memset_f)(void* dst, int value, size_t size); struct Allocator { malloc_f h_malloc; diff --git a/vlm/include/vlm_backend.hpp b/vlm/include/vlm_backend.hpp index 148316b..adf9a18 100644 --- a/vlm/include/vlm_backend.hpp +++ b/vlm/include/vlm_backend.hpp @@ -18,16 +18,21 @@ class Backend { MeshGeom* dd_mesh_geom; // Mutable meshes (temporal state) - Mesh2* hh_mesh; // host ptr to host buffers for io + // Mesh2* hh_mesh; // host ptr to host buffers for io Mesh2* hd_mesh; // host ptr to device buffers Mesh2* dd_mesh; // device ptr to device buffers for kernels Data* hd_data; Data* dd_data; + i32* d_solver_info = nullptr; + i32* d_solver_ipiv = nullptr; + f32* d_solver_buffer = nullptr; + f32 sigma_vatistas = 0.0f; - Backend(MeshGeom* mesh_geom); - void init(u64 timesteps); + Backend() = default; + ~Backend(); + void init(MeshGeom* mesh_geom, u64 timesteps); // Acts as delayed constructor virtual void reset() = 0; virtual void lhs_assemble() = 0; virtual void compute_rhs() = 0; @@ -46,7 +51,10 @@ class Backend { virtual void compute_delta_gamma() = 0; virtual void set_velocities(const linalg::alias::float3& vel) = 0; virtual void set_velocities(const SoA_3D_t& vels) = 0; - virtual ~Backend() = default; + + private: + virtual f32 mesh_mac(u64 j, u64 n) = 0; // mean chord + virtual f32 mesh_area(const u64 i, const u64 j, const u64 m, const u64 n) = 0; // mean span }; std::unique_ptr create_backend(const std::string& backend_name, const MeshGeom* mesh, int timesteps); diff --git a/vlm/include/vlm_data.hpp b/vlm/include/vlm_data.hpp index fcd07cf..0f8b33b 100644 --- a/vlm/include/vlm_data.hpp +++ b/vlm/include/vlm_data.hpp @@ -2,25 +2,23 @@ #include "vlm_types.hpp" #include "vlm_fwd.hpp" +#include "vlm_allocator.hpp" #include "tinyinterpolate.hpp" namespace vlm { struct Data { - f32* lhs = nullptr; - f32* rhs = nullptr; - f32* gamma = nullptr; - f32* delta_gamma = nullptr; - f32* rollup_vertices = nullptr; - f32* local_velocities = nullptr; - f32* trefftz_buffer = nullptr; - - i32* solver_info = nullptr; - i32* solver_ipiv = nullptr; - f32* solver_buffer = nullptr; + f32* lhs = nullptr; // (ns*nc)^2 + f32* rhs = nullptr; // ns*nc + f32* gamma = nullptr; // (nc+nw)*nw + f32* delta_gamma = nullptr; // nc*ns + f32* rollup_vertices = nullptr; // (nc+nw+1)*(ns+1)*3 + f32* local_velocities = nullptr; // ns*nc*3 + f32* trefftz_buffer = nullptr; // ns TODO: can we get rid of this ?? }; -void data_alloc(const Allocator* allocator, Data* data, u64 n, u64 m); +void data_alloc(const malloc_f malloc, Data* data, u64 nc, u64 ns, u64 nw); +void data_free(const free_f free, Data* data); // Flow characteristics class FlowData { diff --git a/vlm/include/vlm_mesh.hpp b/vlm/include/vlm_mesh.hpp index b996911..0c8a422 100644 --- a/vlm/include/vlm_mesh.hpp +++ b/vlm/include/vlm_mesh.hpp @@ -23,7 +23,12 @@ struct Mesh2 { f32 s_ref = 0.0f; // reference area f32 c_ref = 0.0f; // reference chord - f32 frame[16] = {}; // 4x4 homogeneous matrix in col major order + f32 frame[16] = { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + }; // Col major order (TODO: move this to kinematic tracker) char* name = nullptr; // mesh name bool lifting = true; @@ -41,93 +46,93 @@ void mesh_alloc(const malloc_f malloc, Mesh2* mesh, u64 nc, u64 ns, u64 nw, u64 void mesh_free(const free_f free, Mesh2* mesh); void mesh_copy(const memcpy_f memcpy, Mesh2* dst, const Mesh2* src); -// === STRUCTURED MESH === -// nc : number of panels chordwise -// ns : number of panels spanwise -// nw : number of panels in chordwise wake -// ncw : nc + nw -class Mesh { - public: - // Unstructured members (for exporting results) - // --------------------- - // All vertices stored in single SoA for result exporting - // (stored in span major order) - SoA_3D_t v; // size (ncw+1)*(ns+1) - // Offsets for indexing in connectivity array for each panel - std::vector offsets = {}; // size nc*ns + 1 - // Panel-vertex connectivity - // vertices indices of a panel: connectivity[offsets[i]] to connectivity[offsets[i+1]] - std::vector connectivity = {}; // size 4*(nc*ns) - - // Panels metrics (stored span major order) - // --------------------- - // Collocation points of panels - SoA_3D_t colloc; // size ncw*ns - // Normals of panels - SoA_3D_t normal; // size ncw*ns - // Area of panels - std::vector area = {}; // size ncw*ns - - // Structured dimensions - // --------------------- - u64 nc = 1; // number of panels chordwise - u64 ns = 1; // number of panels spanwise - u64 nw; // number of wake panels chordwise (max capacity) - u64 current_nw = 0; // current number of built wake panels (temporary) - - // Analytical quanities when provided - // --------------------- - f32 s_ref = 0.0f; // reference area (of wing) - f32 c_ref = 0.0f; // reference chord (of wing) - - linalg::alias::float4x4 frame = linalg::identity; // transformation matrix - linalg::alias::float3 ref_pt = {0.25f, 0.0f, 0.0f}; // TODO: deprecate - - void update_wake(const linalg::alias::float3& u_inf); // update wake vertices - void correction_high_aoa(f32 alpha_rad); // correct collocation points for high aoa - void create_vortex_panels(); // create true vortex panels - void compute_connectivity(); // fill offsets, connectivity - void compute_metrics_wing(); // fill colloc, normal, area - void compute_metrics_wake(); - void compute_metrics_i(u64 i); - void move(const linalg::alias::float4x4& transform, const SoA_3D_t& origin_pos); - void resize_wake(const u64 nw); +// // === STRUCTURED MESH === +// // nc : number of panels chordwise +// // ns : number of panels spanwise +// // nw : number of panels in chordwise wake +// // ncw : nc + nw +// class Mesh { +// public: +// // Unstructured members (for exporting results) +// // --------------------- +// // All vertices stored in single SoA for result exporting +// // (stored in span major order) +// SoA_3D_t v; // size (ncw+1)*(ns+1) +// // Offsets for indexing in connectivity array for each panel +// std::vector offsets = {}; // size nc*ns + 1 +// // Panel-vertex connectivity +// // vertices indices of a panel: connectivity[offsets[i]] to connectivity[offsets[i+1]] +// std::vector connectivity = {}; // size 4*(nc*ns) + +// // Panels metrics (stored span major order) +// // --------------------- +// // Collocation points of panels +// SoA_3D_t colloc; // size ncw*ns +// // Normals of panels +// SoA_3D_t normal; // size ncw*ns +// // Area of panels +// std::vector area = {}; // size ncw*ns + +// // Structured dimensions +// // --------------------- +// u64 nc = 1; // number of panels chordwise +// u64 ns = 1; // number of panels spanwise +// u64 nw; // number of wake panels chordwise (max capacity) +// u64 current_nw = 0; // current number of built wake panels (temporary) + +// // Analytical quanities when provided +// // --------------------- +// f32 s_ref = 0.0f; // reference area (of wing) +// f32 c_ref = 0.0f; // reference chord (of wing) + +// linalg::alias::float4x4 frame = linalg::identity; // transformation matrix +// linalg::alias::float3 ref_pt = {0.25f, 0.0f, 0.0f}; // TODO: deprecate + +// void update_wake(const linalg::alias::float3& u_inf); // update wake vertices +// void correction_high_aoa(f32 alpha_rad); // correct collocation points for high aoa +// void create_vortex_panels(); // create true vortex panels +// void compute_connectivity(); // fill offsets, connectivity +// void compute_metrics_wing(); // fill colloc, normal, area +// void compute_metrics_wake(); +// void compute_metrics_i(u64 i); +// void move(const linalg::alias::float4x4& transform, const SoA_3D_t& origin_pos); +// void resize_wake(const u64 nw); - u64 nb_panels_wing() const; - u64 nb_panels_total() const; - u64 nb_vertices_wing() const; - u64 nb_vertices_total() const; - f32 panels_area(const u64 i, const u64 j, const u64 m, const u64 n) const; - f32 panels_area_xy(const u64 i, const u64 j, const u64 m, const u64 n) const; - f32 panel_width_y(const u64 i, const u64 j) const; - f32 panel_length(const u64 i, const u64 j) const; - f32 strip_width(const u64 j) const; - f32 chord_length(const u64 j) const; // vertex idx - f32 chord_mean(const u64 j, const u64 n) const; - - // i panel vertices coordinates - linalg::alias::float3 get_v0(u64 i) const; // upper left - linalg::alias::float3 get_v1(u64 i) const; // upper right - linalg::alias::float3 get_v2(u64 i) const; // lower right - linalg::alias::float3 get_v3(u64 i) const; // lower left - - void io_read(const std::string& filename); - Mesh(const tiny::Config& cfg); - Mesh( - const std::string& filename, - const u64 nw, - const f32 s_ref_, - const f32 c_ref_, - const linalg::alias::float3& ref_pt_ // todo: deprecate - ); +// u64 nb_panels_wing() const; +// u64 nb_panels_total() const; +// u64 nb_vertices_wing() const; +// u64 nb_vertices_total() const; +// f32 panels_area(const u64 i, const u64 j, const u64 m, const u64 n) const; +// f32 panels_area_xy(const u64 i, const u64 j, const u64 m, const u64 n) const; +// f32 panel_width_y(const u64 i, const u64 j) const; +// f32 panel_length(const u64 i, const u64 j) const; +// f32 strip_width(const u64 j) const; +// f32 chord_length(const u64 j) const; // vertex idx +// f32 chord_mean(const u64 j, const u64 n) const; + +// // i panel vertices coordinates +// linalg::alias::float3 get_v0(u64 i) const; // upper left +// linalg::alias::float3 get_v1(u64 i) const; // upper right +// linalg::alias::float3 get_v2(u64 i) const; // lower right +// linalg::alias::float3 get_v3(u64 i) const; // lower left + +// void io_read(const std::string& filename); +// Mesh(const tiny::Config& cfg); +// Mesh( +// const std::string& filename, +// const u64 nw, +// const f32 s_ref_, +// const f32 c_ref_, +// const linalg::alias::float3& ref_pt_ // todo: deprecate +// ); - private: - void alloc(); // allocate memory for all buffers - void init(); // called at the end of constructor - void io_read_plot3d_structured(std::ifstream& f); -}; - -// todo, update this to mirror the constructor -std::unique_ptr create_mesh(const std::string& filename, const u64 nw=1); +// private: +// void alloc(); // allocate memory for all buffers +// void init(); // called at the end of constructor +// void io_read_plot3d_structured(std::ifstream& f); +// }; + +// // todo, update this to mirror the constructor +// std::unique_ptr create_mesh(const std::string& filename, const u64 nw=1); } // namespace vlm \ No newline at end of file diff --git a/vlm/src/vlm_backend.cpp b/vlm/src/vlm_backend.cpp index 033df5c..b924127 100644 --- a/vlm/src/vlm_backend.cpp +++ b/vlm/src/vlm_backend.cpp @@ -13,30 +13,32 @@ using namespace vlm; +// Here we assume constant shape wing since mesh_sref +// TODO: remove this limitation f32 Backend::compute_coefficient_cl(const FlowData& flow) { - return compute_coefficient_cl(flow, mesh.s_ref, 0, mesh.ns); + return compute_coefficient_cl(flow, mesh_area(0,0,hd_mesh->nc, hd_mesh->ns), 0, hd_mesh->ns); } linalg::alias::float3 Backend::compute_coefficient_cm(const FlowData& flow) { - return compute_coefficient_cm(flow, mesh.s_ref, mesh.c_ref, 0, mesh.ns); + return compute_coefficient_cm(flow, mesh_area(0,0,hd_mesh->nc, hd_mesh->ns), mesh_mac(0,hd_mesh->ns), 0, hd_mesh->ns); } f32 Backend::compute_coefficient_cd(const FlowData& flow) { - return compute_coefficient_cd(flow, mesh.s_ref, 0, mesh.ns); + return compute_coefficient_cd(flow, mesh_area(0,0,hd_mesh->nc, hd_mesh->ns), 0, hd_mesh->ns); } -std::unique_ptr vlm::create_backend(const std::string& backend_name, Mesh& mesh) { +std::unique_ptr vlm::create_backend(const std::string& backend_name, MeshGeom* mesh, u64 timesteps) { //tiny::CPUID cpuid; //cpuid.print_info(); #ifdef VLM_CPU if (backend_name == "cpu") { - return std::make_unique(mesh); + return std::make_unique(mesh, timesteps); } #endif #ifdef VLM_CUDA if (backend_name == "cuda") { - return std::make_unique(mesh); + return std::make_unique(mesh, timesteps); } #endif throw std::runtime_error("Unsupported backend: " + backend_name); @@ -53,29 +55,28 @@ std::vector vlm::get_available_backends() { return backends; } -Backend::Backend(MeshGeom* mesh_geom) : hh_mesh_geom(mesh_geom) {}; - -void Backend::init(u64 timesteps) { - u64 nb_vertices_wing = (hh_mesh_geom->nc+1)*(hh_mesh_geom->ns+1); +void Backend::init(MeshGeom* mesh_geom, u64 timesteps) { + hh_mesh_geom = mesh_geom; // Borrowed ptr + const u64 nb_vertices_wing = (hh_mesh_geom->nc+1)*(hh_mesh_geom->ns+1); // Mesh structs allocation hd_mesh_geom = (MeshGeom*)allocator.h_malloc(sizeof(MeshGeom)); dd_mesh_geom = (MeshGeom*)allocator.d_malloc(sizeof(MeshGeom)); - hh_mesh = (Mesh2*)allocator.h_malloc(sizeof(Mesh2)); + // hh_mesh = (Mesh2*)allocator.h_malloc(sizeof(Mesh2)); hd_mesh = (Mesh2*)allocator.h_malloc(sizeof(Mesh2)); dd_mesh = (Mesh2*)allocator.d_malloc(sizeof(Mesh2)); + hd_data = (Data*)allocator.h_malloc(sizeof(Data)); + dd_data = (Data*)allocator.d_malloc(sizeof(Data)); // Allocate mesh buffers on device and host hd_mesh_geom->vertices = (f32*) allocator.d_malloc(nb_vertices_wing*3*sizeof(f32)); // mesh_alloc(allocator.h_malloc, hh_mesh, hh_mesh_geom->nc, hh_mesh_geom->ns, timesteps, 0); mesh_alloc(allocator.d_malloc, hd_mesh, hh_mesh_geom->nc, hh_mesh_geom->ns, timesteps, 0); + data_alloc(allocator.d_malloc, hd_data, hh_mesh_geom->nc, hh_mesh_geom->ns, timesteps); // Copy indices hd_mesh_geom->nc = hh_mesh_geom->nc; hd_mesh_geom->ns = hh_mesh_geom->ns; - hd_mesh->nc = hh_mesh_geom->nc; - hd_mesh->ns = hh_mesh_geom->ns; - hd_mesh->nw = timesteps; // Copy raw vertex geometry directly to device allocator.hd_memcpy(hd_mesh_geom->vertices, hh_mesh_geom->vertices, nb_vertices_wing*3*sizeof(f32)); @@ -84,4 +85,25 @@ void Backend::init(u64 timesteps) { // Copy host-device mesh ptr to device-device allocator.hd_memcpy(dd_mesh_geom, hd_mesh_geom, sizeof(*hd_mesh_geom)); allocator.hd_memcpy(dd_mesh, hd_mesh, sizeof(*hd_mesh)); + allocator.hd_memcpy(dd_data, hd_data, sizeof(*hd_data)); +} + +Backend::~Backend() { + // Free device-device ptrs + allocator.d_free(dd_data); + allocator.d_free(dd_mesh); + allocator.d_free(dd_mesh_geom); + allocator.d_free(d_solver_info); + allocator.d_free(d_solver_ipiv); + allocator.d_free(d_solver_buffer); + + // Free device buffers store in host-device ptrs + allocator.d_free(hd_mesh_geom->vertices); + mesh_free(allocator.d_free, hd_mesh); + data_free(allocator.d_free, hd_data); + + // Free host-device ptrs + allocator.h_free(hd_data); + allocator.h_free(hd_mesh); + allocator.h_free(hd_mesh_geom); } \ No newline at end of file diff --git a/vlm/src/vlm_data.cpp b/vlm/src/vlm_data.cpp index 68c120f..1fc2353 100644 --- a/vlm/src/vlm_data.cpp +++ b/vlm/src/vlm_data.cpp @@ -20,4 +20,28 @@ FlowData::FlowData(const linalg::alias::float3& freestream_, const f32 rho_): freestream(freestream_), lift_axis(compute_lift_axis(freestream)), stream_axis(compute_stream_axis(freestream)) { -} \ No newline at end of file +} + +void vlm::data_alloc(const malloc_f malloc, Data* data, u64 nc, u64 ns, u64 nw) { + const u64 nb_panels_wing = nc * ns; + const u64 nb_panels_total = (nc+nw) * ns; + const u64 nb_vertices_wing = (nc+1) * (ns+1); + const u64 nb_vertices_total = (nc+nw+1) * (ns+1); + data->lhs = (f32*)malloc(nb_panels_wing * nb_panels_wing * sizeof(f32)); + data->rhs = (f32*)malloc(nb_panels_wing * sizeof(f32)); + data->gamma = (f32*)malloc(nb_panels_total * sizeof(f32)); + data->delta_gamma = (f32*)malloc(nb_panels_wing * sizeof(f32)); + data->rollup_vertices = (f32*)malloc(nb_vertices_total * 3 * sizeof(f32)); // TODO: this can be reduced to (nw+1)*(ns+1)*3 + data->local_velocities = (f32*)malloc(nb_panels_wing * 3 * sizeof(f32)); + data->trefftz_buffer = (f32*)malloc(ns * sizeof(f32)); +} + +void data_free(const free_f free, Data* data) { + free(data->lhs); + free(data->rhs); + free(data->gamma); + free(data->delta_gamma); + free(data->rollup_vertices); + free(data->local_velocities); + free(data->trefftz_buffer); +} diff --git a/vlm/src/vlm_mesh.cpp b/vlm/src/vlm_mesh.cpp index 9d44386..dce0cfc 100644 --- a/vlm/src/vlm_mesh.cpp +++ b/vlm/src/vlm_mesh.cpp @@ -114,45 +114,21 @@ void mesh_io_read_file(const std::string& filename, MeshGeom* mesh_geom) { } } -// TODO: deprecate everything below this line -Mesh::Mesh(const tiny::Config& cfg) : - Mesh( - cfg().section("files").get("mesh"), - cfg().section("mesh").get("nw", 1), - cfg().section("mesh").get("s_ref", 0.0f), - cfg().section("mesh").get("c_ref", 0.0f), - linalg::alias::float3{cfg().section("mesh").get>("ref_pt").data()} - ) { -} - -Mesh::Mesh( - const std::string& filename, - const u64 nw_, - const f32 s_ref_ = 0.0f, - const f32 c_ref_ = 0.0f, - const linalg::alias::float3& ref_pt_ = {0.25f, 0.0f, 0.0f} - ) { - nw = nw_; - s_ref = s_ref_; - c_ref = c_ref_; - ref_pt = ref_pt_; - io_read(filename); - init(); -} - -std::unique_ptr vlm::create_mesh(const std::string& filename, const u64 nw) { - return std::make_unique(filename, nw); -} +void mesh_quaterchord(MeshGeom* mesh_geom) { + const u64 nc = mesh_geom->nc; + const u64 ns = mesh_geom->ns; + f32* vx = mesh_geom->vertices + 0 * (nc+1) * (ns+1); + f32* vy = mesh_geom->vertices + 1 * (nc+1) * (ns+1); + f32* vz = mesh_geom->vertices + 2 * (nc+1) * (ns+1); -void Mesh::create_vortex_panels() { for (u64 i = 0; i < nc; i++) { for (u64 j = 0; j < ns+1; j++) { const u64 v0 = (i+0) * (ns+1) + j; const u64 v3 = (i+1) * (ns+1) + j; - v.x[v0] = 0.75f * v.x[v0] + 0.25f * v.x[v3]; - v.y[v0] = 0.75f * v.y[v0] + 0.25f * v.y[v3]; - v.z[v0] = 0.75f * v.z[v0] + 0.25f * v.z[v3]; + vx[v0] = 0.75f * vx[v0] + 0.25f * vx[v3]; + vy[v0] = 0.75f * vy[v0] + 0.25f * vy[v3]; + vz[v0] = 0.75f * vz[v0] + 0.25f * vz[v3]; } } @@ -162,384 +138,384 @@ void Mesh::create_vortex_panels() { const u64 v0 = (i+0) * (ns+1) + j; const u64 v3 = (i+1) * (ns+1) + j; - v.x[v3] = 1.25f * v.x[v3] - 0.25f * v.x[v0]; - v.y[v3] = 1.25f * v.y[v3] - 0.25f * v.y[v0]; - v.z[v3] = 1.25f * v.z[v3] - 0.25f * v.z[v0]; - } -} - -void Mesh::init() { - create_vortex_panels(); - if (c_ref == 0.0f) c_ref = chord_mean(0, ns+1); // use MAC as default - if (s_ref == 0.0f) s_ref = panels_area_xy(0,0, nc, ns); // use projected area as default - compute_connectivity(); - compute_metrics_wing(); -} - -void Mesh::alloc() { - const u64 ncw = nc + nw; - v.resize((ncw + 1) * (ns + 1)); - offsets.resize(nc * ns + 1); - connectivity.resize(4 * nc * ns); - - colloc.resize(ncw * ns); - normal.resize(ncw * ns); - area.resize(ncw * ns); -} - -u64 Mesh::nb_panels_wing() const { return nc * ns; }; -u64 Mesh::nb_panels_total() const { return (nc+nw) * ns; }; -u64 Mesh::nb_vertices_wing() const { return (nc + 1) * (ns + 1); }; -u64 Mesh::nb_vertices_total() const { return (nc + nw + 1) * (ns + 1); }; - -/// @brief Computes the mean chord of a set of panels -/// @details -/// Mean Aerodynamic Chord = \frac{2}{S} \int_{0}^{b/2} c(y)^{2} dy -/// Integration using the Trapezoidal Rule -/// Validated against empirical formulas for tapered wings -/// @param j first panel index chordwise -/// @param n number of panels spanwise -/// @return mean chord of the set of panels -f32 Mesh::chord_mean(const u64 j, const u64 n) const { - assert(j + n <= ns+1); // spanwise range - assert(n > 1); // minimum 2 chords - f32 mac = 0.0f; - // loop over panel chordwise sections in spanwise direction - // Note: can be done optimally with vertical fused simd - for (u64 v = 0; v < n - 1; v++) { - const f32 c0 = chord_length(j + v); - const f32 c1 = chord_length(j + v + 1); - mac += 0.5f * (c0 * c0 + c1 * c1) * panel_width_y(0, j + v); + vx[v3] = 1.25f * vx[v3] - 0.25f * vx[v0]; + vy[v3] = 1.25f * vy[v3] - 0.25f * vy[v0]; + vz[v3] = 1.25f * vz[v3] - 0.25f * vz[v0]; } - // Since we divide by half the total wing area (both sides) we dont need to multiply by 2 - return mac / panels_area_xy(0, j, nc, n-1); } -/// @brief Computes the total area of a 2D set of panels -/// @param i first panel index chordwise -/// @param j first panel panel index spanwise -/// @param m number of panels chordwise -/// @param n number of panels spanwise -/// @return sum of the areas of the panels -f32 Mesh::panels_area(const u64 i, const u64 j, const u64 m, const u64 n) const { - assert(i + m <= nc); - assert(j + n <= ns); +// void Mesh::init() { +// create_vortex_panels(); +// if (c_ref == 0.0f) c_ref = chord_mean(0, ns+1); // use MAC as default +// if (s_ref == 0.0f) s_ref = panels_area_xy(0,0, nc, ns); // use projected area as default +// compute_connectivity(); +// compute_metrics_wing(); +// } + +// void Mesh::alloc() { +// const u64 ncw = nc + nw; +// v.resize((ncw + 1) * (ns + 1)); +// offsets.resize(nc * ns + 1); +// connectivity.resize(4 * nc * ns); + +// colloc.resize(ncw * ns); +// normal.resize(ncw * ns); +// area.resize(ncw * ns); +// } + +// u64 Mesh::nb_panels_wing() const { return nc * ns; }; +// u64 Mesh::nb_panels_total() const { return (nc+nw) * ns; }; +// u64 Mesh::nb_vertices_wing() const { return (nc + 1) * (ns + 1); }; +// u64 Mesh::nb_vertices_total() const { return (nc + nw + 1) * (ns + 1); }; + +// /// @brief Computes the mean chord of a set of panels +// /// @details +// /// Mean Aerodynamic Chord = \frac{2}{S} \int_{0}^{b/2} c(y)^{2} dy +// /// Integration using the Trapezoidal Rule +// /// Validated against empirical formulas for tapered wings +// /// @param j first panel index chordwise +// /// @param n number of panels spanwise +// /// @return mean chord of the set of panels +// f32 Mesh::chord_mean(const u64 j, const u64 n) const { +// assert(j + n <= ns+1); // spanwise range +// assert(n > 1); // minimum 2 chords +// f32 mac = 0.0f; +// // loop over panel chordwise sections in spanwise direction +// // Note: can be done optimally with vertical fused simd +// for (u64 v = 0; v < n - 1; v++) { +// const f32 c0 = chord_length(j + v); +// const f32 c1 = chord_length(j + v + 1); +// mac += 0.5f * (c0 * c0 + c1 * c1) * panel_width_y(0, j + v); +// } +// // Since we divide by half the total wing area (both sides) we dont need to multiply by 2 +// return mac / panels_area_xy(0, j, nc, n-1); +// } + +// /// @brief Computes the total area of a 2D set of panels +// /// @param i first panel index chordwise +// /// @param j first panel panel index spanwise +// /// @param m number of panels chordwise +// /// @param n number of panels spanwise +// /// @return sum of the areas of the panels +// f32 Mesh::panels_area(const u64 i, const u64 j, const u64 m, const u64 n) const { +// assert(i + m <= nc); +// assert(j + n <= ns); - const u64 ld = ns; - const f32* areas = &area[j + i * ld]; - - f32 total_area = 0.0f; - for (u64 u = 0; u < m; u++) { - for (u64 v = 0; v < n; v++) { - total_area += areas[v + u * ld]; - } - } - return total_area; -} - -/// @brief Computes the total area of a 2D set of panels projected on the xy plane -/// @param i first panel index chordwise -/// @param j first panel panel index spanwise -/// @param m number of panels chordwise -/// @param n number of panels spanwise -/// @return sum of the areas of the panels projected on the xy plane -f32 Mesh::panels_area_xy(const u64 i, const u64 j, const u64 m, const u64 n) const { - assert(i + m <= nc); - assert(j + n <= ns); +// const u64 ld = ns; +// const f32* areas = &area[j + i * ld]; + +// f32 total_area = 0.0f; +// for (u64 u = 0; u < m; u++) { +// for (u64 v = 0; v < n; v++) { +// total_area += areas[v + u * ld]; +// } +// } +// return total_area; +// } + +// /// @brief Computes the total area of a 2D set of panels projected on the xy plane +// /// @param i first panel index chordwise +// /// @param j first panel panel index spanwise +// /// @param m number of panels chordwise +// /// @param n number of panels spanwise +// /// @return sum of the areas of the panels projected on the xy plane +// f32 Mesh::panels_area_xy(const u64 i, const u64 j, const u64 m, const u64 n) const { +// assert(i + m <= nc); +// assert(j + n <= ns); - // Area of a quad: - // 0.5 * || d1 x d2 || where d1 and d2 are the diagonals of the quad - f32 total_area = 0.0f; - // Note: this is highly inefficient as vertex coordinates should be loaded with simd - for (u64 u = 0; u < m; u++) { - for (u64 v = 0; v < n; v++) { - const u64 idx = j + v + (i+u) * ns; - const linalg::alias::float3 d1 = get_v0(idx) - get_v2(idx); - const linalg::alias::float3 d2 = get_v1(idx) - get_v3(idx); - const linalg::alias::float3 cross = linalg::cross(d1, d2); +// // Area of a quad: +// // 0.5 * || d1 x d2 || where d1 and d2 are the diagonals of the quad +// f32 total_area = 0.0f; +// // Note: this is highly inefficient as vertex coordinates should be loaded with simd +// for (u64 u = 0; u < m; u++) { +// for (u64 v = 0; v < n; v++) { +// const u64 idx = j + v + (i+u) * ns; +// const linalg::alias::float3 d1 = get_v0(idx) - get_v2(idx); +// const linalg::alias::float3 d2 = get_v1(idx) - get_v3(idx); +// const linalg::alias::float3 cross = linalg::cross(d1, d2); - total_area += 0.5f * std::abs(cross.z); - // std::cout << "area xy: " << 0.5f * std::abs(cross.z()) << "\n"; - // std::cout << "area: " << area[idx] << "\n"; - } - } - return total_area; -} - -f32 Mesh::panel_length(const u64 i, const u64 j) const { - // Since chordwise segments are always parallel, we can simply measure the width of the first panel - assert(i < nc); - assert(j < ns); - const linalg::alias::float3 v0 = get_v0(j + i * ns); - const linalg::alias::float3 v1 = get_v1(j + i * ns); - const linalg::alias::float3 v2 = get_v2(j + i * ns); - const linalg::alias::float3 v3 = get_v3(j + i * ns); - return 0.5f * (linalg::length(v3-v0) + linalg::length(v2-v1)); -} - -/// @brief Computes the width of a single panel in pure y direction -/// @param i panel index chordwise -/// @param j panel index spanwise -f32 Mesh::panel_width_y(const u64 i, const u64 j) const { - assert(i < nc); - assert(j < ns); - const u64 ld = ns + 1; - - return v.y[j + 1 + i * ld] - v.y[j + i * ld]; -} - -f32 Mesh::strip_width(const u64 j) const { - assert(j < ns); - const linalg::alias::float3 v0 = get_v0(j); - const linalg::alias::float3 v1 = get_v1(j); - return linalg::length(v1 - v0); -} - -/// @brief Computes the chord length of a chordwise segment -/// @details Since the mesh follows the camber line, the chord length is computed -/// as the distance between the first and last vertex of a chordwise segment -/// @param j chordwise segment index -f32 Mesh::chord_length(const u64 j) const { - assert(j <= ns); // spanwise vertex range - const f32 dx = v.x[j + nc * (ns+1)] - v.x[j]; - const f32 dy = 0.0f; // chordwise segments are parallel to the x axis - const f32 dz = v.z[j + nc * (ns+1)] - v.z[j]; - - return std::sqrt(dx * dx + dy * dy + dz * dz); -} - -linalg::alias::float3 Mesh::get_v0(u64 i) const { - const u64 idx = i + i / ns; - return {v.x[idx], v.y[idx], v.z[idx]}; -} - -linalg::alias::float3 Mesh::get_v1(u64 i) const { - const u64 idx = i + i / ns + 1; - return {v.x[idx], v.y[idx], v.z[idx]}; -} - -linalg::alias::float3 Mesh::get_v2(u64 i) const { - const u64 idx = i + i / ns + ns + 2; - return {v.x[idx], v.y[idx], v.z[idx]}; -} - -linalg::alias::float3 Mesh::get_v3(u64 i) const { - const u64 idx = i + i / ns + ns + 1; - return {v.x[idx], v.y[idx], v.z[idx]}; -} - -void Mesh::update_wake(const linalg::alias::float3& freestream) { - const f32 chord_root = chord_length(0); - const f32 off_x = freestream.x * 100.0f * chord_root; - const f32 off_y = freestream.y * 100.0f * chord_root; - const f32 off_z = freestream.z * 100.0f * chord_root; - - const u64 v_ns = ns + 1; - const u64 begin_trailing_edge = nb_vertices_wing()-v_ns; - const u64 end_trailing_edge = nb_vertices_wing(); - // Add one layer of wake vertices - // this can be parallelized (careful to false sharing tho) - for (u64 i = begin_trailing_edge; i < end_trailing_edge; ++i) { - v.x[i + v_ns] = v.x[i] + off_x; - v.y[i + v_ns] = v.y[i] + off_y; - v.z[i + v_ns] = v.z[i] + off_z; - } - compute_metrics_wake(); - current_nw = 1; -} - -// https://publications.polymtl.ca/2555/1/2017_MatthieuParenteau.pdf (Eq 3.4 p21) -void Mesh::correction_high_aoa(f32 alpha_rad) { - const f32 factor = 0.5f * alpha_rad / (std::sin(alpha_rad) + EPS); // correction factor - // Note: this can be vectorized and parallelized - for (u64 i = 0; i < nb_panels_total(); i++) { - // "chord vector" from center of leading line (v0-v1) to trailing line (v3-v2) - const linalg::alias::float3 chord_vec = 0.5f * (get_v2(i) + get_v3(i) - get_v0(i) - get_v1(i)); - const linalg::alias::float3 colloc_pt = 0.5f * (get_v0(i) + get_v1(i)) + factor * chord_vec; - // collocation calculated as a translation of the center of leading line center - colloc.x[i] = colloc_pt.x; - colloc.y[i] = colloc_pt.y; - colloc.z[i] = colloc_pt.z; - } -} - -void Mesh::compute_connectivity() { - // indices of the points forming the quad - // span(y) -> - // chord(x) 0--1 - // | | | - // \/ 3--2 - std::array quad = {}; - - // Note: with some modifications this loop can be parallelized - for (u64 i = 0; i < nb_panels_wing(); i++) { - // extra offset occuring from the fact that there is 1 more vertex - // per row than surfaces - u64 chord_idx = i / nc; - quad[0] = i + chord_idx; - quad[1] = quad[0] + 1; - quad[2] = quad[1] + ns; - quad[3] = quad[0] + ns; - offsets[i+1] = offsets[i] + 4; - connectivity.insert(connectivity.end(), quad.begin(), quad.end()); - } -} - -void Mesh::compute_metrics_i(u64 i ) { - const linalg::alias::float3 v0 = get_v0(i); - const linalg::alias::float3 v1 = get_v1(i); - const linalg::alias::float3 v2 = get_v2(i); - const linalg::alias::float3 v3 = get_v3(i); - - // Vector v0 from p1 to p3 - const linalg::alias::float3 vec_0 = v3 - v1; - - // Vector v1 from p0 to p2 - const linalg::alias::float3 vec_1 = v2 - v0; - - // Normal = v0 x v1 - const linalg::alias::float3 normal_vec = linalg::normalize(linalg::cross(vec_0, vec_1)); - normal.x[i] = normal_vec.x; - normal.y[i] = normal_vec.y; - normal.z[i] = normal_vec.z; - - // 3 vectors f (P0P3), b (P0P2), e (P0P1) to compute the area: - // area = 0.5 * (||f x b|| + ||b x e||) - // this formula might also work: area = || 0.5 * ( f x b + b x e ) || - const linalg::alias::float3 vec_f = v3 - v0; - const linalg::alias::float3 vec_b = v2 - v0; - const linalg::alias::float3 vec_e = v1 - v0; - - area[i] = 0.5f * (linalg::length(linalg::cross(vec_f, vec_b)) + linalg::length(linalg::cross(vec_b, vec_e))); +// total_area += 0.5f * std::abs(cross.z); +// // std::cout << "area xy: " << 0.5f * std::abs(cross.z()) << "\n"; +// // std::cout << "area: " << area[idx] << "\n"; +// } +// } +// return total_area; +// } + +// f32 Mesh::panel_length(const u64 i, const u64 j) const { +// // Since chordwise segments are always parallel, we can simply measure the width of the first panel +// assert(i < nc); +// assert(j < ns); +// const linalg::alias::float3 v0 = get_v0(j + i * ns); +// const linalg::alias::float3 v1 = get_v1(j + i * ns); +// const linalg::alias::float3 v2 = get_v2(j + i * ns); +// const linalg::alias::float3 v3 = get_v3(j + i * ns); +// return 0.5f * (linalg::length(v3-v0) + linalg::length(v2-v1)); +// } + +// /// @brief Computes the width of a single panel in pure y direction +// /// @param i panel index chordwise +// /// @param j panel index spanwise +// f32 Mesh::panel_width_y(const u64 i, const u64 j) const { +// assert(i < nc); +// assert(j < ns); +// const u64 ld = ns + 1; + +// return v.y[j + 1 + i * ld] - v.y[j + i * ld]; +// } + +// f32 Mesh::strip_width(const u64 j) const { +// assert(j < ns); +// const linalg::alias::float3 v0 = get_v0(j); +// const linalg::alias::float3 v1 = get_v1(j); +// return linalg::length(v1 - v0); +// } + +// /// @brief Computes the chord length of a chordwise segment +// /// @details Since the mesh follows the camber line, the chord length is computed +// /// as the distance between the first and last vertex of a chordwise segment +// /// @param j chordwise segment index +// f32 Mesh::chord_length(const u64 j) const { +// assert(j <= ns); // spanwise vertex range +// const f32 dx = v.x[j + nc * (ns+1)] - v.x[j]; +// const f32 dy = 0.0f; // chordwise segments are parallel to the x axis +// const f32 dz = v.z[j + nc * (ns+1)] - v.z[j]; + +// return std::sqrt(dx * dx + dy * dy + dz * dz); +// } + +// linalg::alias::float3 Mesh::get_v0(u64 i) const { +// const u64 idx = i + i / ns; +// return {v.x[idx], v.y[idx], v.z[idx]}; +// } + +// linalg::alias::float3 Mesh::get_v1(u64 i) const { +// const u64 idx = i + i / ns + 1; +// return {v.x[idx], v.y[idx], v.z[idx]}; +// } + +// linalg::alias::float3 Mesh::get_v2(u64 i) const { +// const u64 idx = i + i / ns + ns + 2; +// return {v.x[idx], v.y[idx], v.z[idx]}; +// } + +// linalg::alias::float3 Mesh::get_v3(u64 i) const { +// const u64 idx = i + i / ns + ns + 1; +// return {v.x[idx], v.y[idx], v.z[idx]}; +// } + +// void Mesh::update_wake(const linalg::alias::float3& freestream) { +// const f32 chord_root = chord_length(0); +// const f32 off_x = freestream.x * 100.0f * chord_root; +// const f32 off_y = freestream.y * 100.0f * chord_root; +// const f32 off_z = freestream.z * 100.0f * chord_root; + +// const u64 v_ns = ns + 1; +// const u64 begin_trailing_edge = nb_vertices_wing()-v_ns; +// const u64 end_trailing_edge = nb_vertices_wing(); +// // Add one layer of wake vertices +// // this can be parallelized (careful to false sharing tho) +// for (u64 i = begin_trailing_edge; i < end_trailing_edge; ++i) { +// v.x[i + v_ns] = v.x[i] + off_x; +// v.y[i + v_ns] = v.y[i] + off_y; +// v.z[i + v_ns] = v.z[i] + off_z; +// } +// compute_metrics_wake(); +// current_nw = 1; +// } + +// // https://publications.polymtl.ca/2555/1/2017_MatthieuParenteau.pdf (Eq 3.4 p21) +// void Mesh::correction_high_aoa(f32 alpha_rad) { +// const f32 factor = 0.5f * alpha_rad / (std::sin(alpha_rad) + EPS); // correction factor +// // Note: this can be vectorized and parallelized +// for (u64 i = 0; i < nb_panels_total(); i++) { +// // "chord vector" from center of leading line (v0-v1) to trailing line (v3-v2) +// const linalg::alias::float3 chord_vec = 0.5f * (get_v2(i) + get_v3(i) - get_v0(i) - get_v1(i)); +// const linalg::alias::float3 colloc_pt = 0.5f * (get_v0(i) + get_v1(i)) + factor * chord_vec; +// // collocation calculated as a translation of the center of leading line center +// colloc.x[i] = colloc_pt.x; +// colloc.y[i] = colloc_pt.y; +// colloc.z[i] = colloc_pt.z; +// } +// } + +// void Mesh::compute_connectivity() { +// // indices of the points forming the quad +// // span(y) -> +// // chord(x) 0--1 +// // | | | +// // \/ 3--2 +// std::array quad = {}; + +// // Note: with some modifications this loop can be parallelized +// for (u64 i = 0; i < nb_panels_wing(); i++) { +// // extra offset occuring from the fact that there is 1 more vertex +// // per row than surfaces +// u64 chord_idx = i / nc; +// quad[0] = i + chord_idx; +// quad[1] = quad[0] + 1; +// quad[2] = quad[1] + ns; +// quad[3] = quad[0] + ns; +// offsets[i+1] = offsets[i] + 4; +// connectivity.insert(connectivity.end(), quad.begin(), quad.end()); +// } +// } + +// void Mesh::compute_metrics_i(u64 i ) { +// const linalg::alias::float3 v0 = get_v0(i); +// const linalg::alias::float3 v1 = get_v1(i); +// const linalg::alias::float3 v2 = get_v2(i); +// const linalg::alias::float3 v3 = get_v3(i); + +// // Vector v0 from p1 to p3 +// const linalg::alias::float3 vec_0 = v3 - v1; + +// // Vector v1 from p0 to p2 +// const linalg::alias::float3 vec_1 = v2 - v0; + +// // Normal = v0 x v1 +// const linalg::alias::float3 normal_vec = linalg::normalize(linalg::cross(vec_0, vec_1)); +// normal.x[i] = normal_vec.x; +// normal.y[i] = normal_vec.y; +// normal.z[i] = normal_vec.z; + +// // 3 vectors f (P0P3), b (P0P2), e (P0P1) to compute the area: +// // area = 0.5 * (||f x b|| + ||b x e||) +// // this formula might also work: area = || 0.5 * ( f x b + b x e ) || +// const linalg::alias::float3 vec_f = v3 - v0; +// const linalg::alias::float3 vec_b = v2 - v0; +// const linalg::alias::float3 vec_e = v1 - v0; + +// area[i] = 0.5f * (linalg::length(linalg::cross(vec_f, vec_b)) + linalg::length(linalg::cross(vec_b, vec_e))); - // collocation point (center of the quad) - const linalg::alias::float3 colloc_pt = 0.25f * (v0 + v1 + v2 + v3); +// // collocation point (center of the quad) +// const linalg::alias::float3 colloc_pt = 0.25f * (v0 + v1 + v2 + v3); - colloc.x[i] = colloc_pt.x; - colloc.y[i] = colloc_pt.y; - colloc.z[i] = colloc_pt.z; -} - -void Mesh::compute_metrics_wing() { - for (u64 i = 0; i < nb_panels_wing(); i++) { - compute_metrics_i(i); - } -} - -void Mesh::compute_metrics_wake() { - for (u64 i = nb_panels_wing(); i < nb_panels_total(); i++) { - compute_metrics_i(i); - } -} - -// plot3d is chordwise major -void Mesh::io_read_plot3d_structured(std::ifstream& f) { - std::cout << "reading plot3d mesh" << std::endl; - u64 ni = 0; // number of vertices chordwise - u64 nj = 0; // number of vertices spanwise - u64 nk = 0; - u64 blocks = 0; - f32 x, y, z; - f >> blocks; - if (blocks != 1) { - throw std::runtime_error("Only single block plot3d mesh is supported"); - } - f >> ni >> nj >> nk; - if (nk != 1) { - throw std::runtime_error("Only 2D plot3d mesh is supported"); - } - - ns = nj - 1; - nc = ni - 1; - alloc(); - - std::cout << "number of panels: " << nb_panels_wing() << std::endl; - std::cout << "ns: " << ns << std::endl; - std::cout << "nc: " << nc << std::endl; +// colloc.x[i] = colloc_pt.x; +// colloc.y[i] = colloc_pt.y; +// colloc.z[i] = colloc_pt.z; +// } + +// void Mesh::compute_metrics_wing() { +// for (u64 i = 0; i < nb_panels_wing(); i++) { +// compute_metrics_i(i); +// } +// } + +// void Mesh::compute_metrics_wake() { +// for (u64 i = nb_panels_wing(); i < nb_panels_total(); i++) { +// compute_metrics_i(i); +// } +// } + +// // plot3d is chordwise major +// void Mesh::io_read_plot3d_structured(std::ifstream& f) { +// std::cout << "reading plot3d mesh" << std::endl; +// u64 ni = 0; // number of vertices chordwise +// u64 nj = 0; // number of vertices spanwise +// u64 nk = 0; +// u64 blocks = 0; +// f32 x, y, z; +// f >> blocks; +// if (blocks != 1) { +// throw std::runtime_error("Only single block plot3d mesh is supported"); +// } +// f >> ni >> nj >> nk; +// if (nk != 1) { +// throw std::runtime_error("Only 2D plot3d mesh is supported"); +// } + +// ns = nj - 1; +// nc = ni - 1; +// alloc(); + +// std::cout << "number of panels: " << nb_panels_wing() << std::endl; +// std::cout << "ns: " << ns << std::endl; +// std::cout << "nc: " << nc << std::endl; - for (u64 j = 0; j < nj; j++) { - for (u64 i = 0; i < ni; i++) { - f >> x; - v.x[nj*i + j] = x; - } - } - for (u64 j = 0; j < nj; j++) { - for (u64 i = 0; i < ni; i++) { - f >> y; - v.y[nj*i + j] = y; - } - } - for (u64 j = 0; j < nj; j++) { - for (u64 i = 0; i < ni; i++) { - f >> z; - v.z[nj*i + j] = z; - } - } - - // TODO: this validation is not robust enough when eventually we will be using multiple bodies - // Validation - // const f32 eps = std::numeric_limits::epsilon(); - // if (std::abs(m.v.x[0]) != eps || std::abs(m.v.y[0]) != eps) { - // throw std::runtime_error("First vertex of plot3d mesh must be at origin"); - // } - const f32 first_y = v.y[0]; - for (u64 i = 1; i < ni; i++) { - if (v.y[i * nj] != first_y) { - throw std::runtime_error("Mesh vertices should be ordered in chordwise direction"); - } - } -} - -void Mesh::io_read(const std::string& filename) { - const std::filesystem::path path(filename); - if (!std::filesystem::exists(path)) { - throw std::runtime_error("Mesh file not found"); - } - - std::ifstream f(path); - if (f.is_open()) { - if (path.extension() == ".x") { - io_read_plot3d_structured(f); - } else { - throw std::runtime_error("Only structured gridpro mesh format is supported"); - } - f.close(); - } else { - throw std::runtime_error("Failed to open mesh file"); - } -} - -void Mesh::move(const linalg::alias::float4x4& transform, const SoA_3D_t& origin_pos) { - // const tiny::ScopedTimer t("Mesh::move"); - assert(current_nw < nw); // check if we have capacity +// for (u64 j = 0; j < nj; j++) { +// for (u64 i = 0; i < ni; i++) { +// f >> x; +// v.x[nj*i + j] = x; +// } +// } +// for (u64 j = 0; j < nj; j++) { +// for (u64 i = 0; i < ni; i++) { +// f >> y; +// v.y[nj*i + j] = y; +// } +// } +// for (u64 j = 0; j < nj; j++) { +// for (u64 i = 0; i < ni; i++) { +// f >> z; +// v.z[nj*i + j] = z; +// } +// } + +// // TODO: this validation is not robust enough when eventually we will be using multiple bodies +// // Validation +// // const f32 eps = std::numeric_limits::epsilon(); +// // if (std::abs(m.v.x[0]) != eps || std::abs(m.v.y[0]) != eps) { +// // throw std::runtime_error("First vertex of plot3d mesh must be at origin"); +// // } +// const f32 first_y = v.y[0]; +// for (u64 i = 1; i < ni; i++) { +// if (v.y[i * nj] != first_y) { +// throw std::runtime_error("Mesh vertices should be ordered in chordwise direction"); +// } +// } +// } + +// void Mesh::io_read(const std::string& filename) { +// const std::filesystem::path path(filename); +// if (!std::filesystem::exists(path)) { +// throw std::runtime_error("Mesh file not found"); +// } + +// std::ifstream f(path); +// if (f.is_open()) { +// if (path.extension() == ".x") { +// io_read_plot3d_structured(f); +// } else { +// throw std::runtime_error("Only structured gridpro mesh format is supported"); +// } +// f.close(); +// } else { +// throw std::runtime_error("Failed to open mesh file"); +// } +// } + +// void Mesh::move(const linalg::alias::float4x4& transform, const SoA_3D_t& origin_pos) { +// // const tiny::ScopedTimer t("Mesh::move"); +// assert(current_nw < nw); // check if we have capacity - // Shed wake before moving - const u64 src_begin = nb_vertices_wing() - (ns + 1); - const u64 src_end = nb_vertices_wing(); - const u64 dst_begin = (nc + nw - current_nw) * (ns + 1); - std::copy(v.x.data() + src_begin, v.x.data() + src_end, v.x.data() + dst_begin); - std::copy(v.y.data() + src_begin, v.y.data() + src_end, v.y.data() + dst_begin); - std::copy(v.z.data() + src_begin, v.z.data() + src_end, v.z.data() + dst_begin); +// // Shed wake before moving +// const u64 src_begin = nb_vertices_wing() - (ns + 1); +// const u64 src_end = nb_vertices_wing(); +// const u64 dst_begin = (nc + nw - current_nw) * (ns + 1); +// std::copy(v.x.data() + src_begin, v.x.data() + src_end, v.x.data() + dst_begin); +// std::copy(v.y.data() + src_begin, v.y.data() + src_end, v.y.data() + dst_begin); +// std::copy(v.z.data() + src_begin, v.z.data() + src_end, v.z.data() + dst_begin); - // Perform the movement - for (u64 i = 0; i < nb_vertices_wing(); i++) { - const linalg::alias::float4 transformed_pt = linalg::mul(transform, linalg::alias::float4{origin_pos.x[i], origin_pos.y[i], origin_pos.z[i], 1.f}); - v.x[i] = transformed_pt.x; - v.y[i] = transformed_pt.y; - v.z[i] = transformed_pt.z; - } - - compute_metrics_wing(); // compute the new wing panel metrics - - // Copy new trailing edge vertices on the wake buffer - std::copy(v.x.data() + src_begin, v.x.data() + src_end, v.x.data() + dst_begin - (ns + 1)); - std::copy(v.y.data() + src_begin, v.y.data() + src_end, v.y.data() + dst_begin - (ns + 1)); - std::copy(v.z.data() + src_begin, v.z.data() + src_end, v.z.data() + dst_begin - (ns + 1)); - - current_nw++; -} - -void Mesh::resize_wake(const u64 nw_) { - nw = nw_; - alloc(); // resizes the buffers -} +// // Perform the movement +// for (u64 i = 0; i < nb_vertices_wing(); i++) { +// const linalg::alias::float4 transformed_pt = linalg::mul(transform, linalg::alias::float4{origin_pos.x[i], origin_pos.y[i], origin_pos.z[i], 1.f}); +// v.x[i] = transformed_pt.x; +// v.y[i] = transformed_pt.y; +// v.z[i] = transformed_pt.z; +// } + +// compute_metrics_wing(); // compute the new wing panel metrics + +// // Copy new trailing edge vertices on the wake buffer +// std::copy(v.x.data() + src_begin, v.x.data() + src_end, v.x.data() + dst_begin - (ns + 1)); +// std::copy(v.y.data() + src_begin, v.y.data() + src_end, v.y.data() + dst_begin - (ns + 1)); +// std::copy(v.z.data() + src_begin, v.z.data() + src_end, v.z.data() + dst_begin - (ns + 1)); + +// current_nw++; +// } + +// void Mesh::resize_wake(const u64 nw_) { +// nw = nw_; +// alloc(); // resizes the buffers +// }