diff --git a/TODO b/TODO index 0133b21..998d62d 100644 --- a/TODO +++ b/TODO @@ -1,6 +1,6 @@ - Move some of the logic from theodorsen test case to its own class - Consider calling delta_gamma directly from lu_solve - Get rid of exceptions - +- All backend functions should return a tf::Task so that we can build a single taskflow graph // store kinematics in the backend ?? // store flow properties in the backend ?? \ No newline at end of file diff --git a/vlm/backends/cpu/include/vlm_backend_cpu.hpp b/vlm/backends/cpu/include/vlm_backend_cpu.hpp index ca7beee..85b597e 100644 --- a/vlm/backends/cpu/include/vlm_backend_cpu.hpp +++ b/vlm/backends/cpu/include/vlm_backend_cpu.hpp @@ -11,8 +11,8 @@ class BackendCPU final : public Backend { ~BackendCPU(); void lhs_assemble(View>& lhs, const View& colloc, const View& normals, const View& verts_wing, const View& verts_wake, u32 iteration) override; void rhs_assemble_velocities(View& rhs, const View& normals, const View& velocities) override; - void rhs_assemble_wake_influence(View& rhs, const View& gamma) override; - void displace_wake_rollup(float dt, View& rollup_vertices, View& verts_wake, const View& verts_wing, const View& gamma) override; + void rhs_assemble_wake_influence(View& rhs, const View& gamma, const View& colloc, const View& normals, const View& verts_wake, u32 iteration) override; + void displace_wake_rollup(View& wake_rollup, const View& verts_wake, const View& verts_wing, const View& gamma_wing, const View& gamma_wake, f32 dt, u32 iteration) override; // void displace_wing(const linalg::alias::float4x4& transform) override; void displace_wing_and_shed(const std::vector& transform, View& verts_wing, View& verts_wake) override; void gamma_shed(View& gamma, View& gamma_prev) override; diff --git a/vlm/backends/cpu/include/vlm_backend_cpu_kernels.hpp b/vlm/backends/cpu/include/vlm_backend_cpu_kernels.hpp deleted file mode 100644 index e879f37..0000000 --- a/vlm/backends/cpu/include/vlm_backend_cpu_kernels.hpp +++ /dev/null @@ -1,13 +0,0 @@ -#include "vlm_types.hpp" - -namespace vlm { -void kernel_influence( - u64 m, u64 n, - f32 lhs[], - f32 vx[], f32 vy[], f32 vz[], - f32 collocx[], f32 collocy[], f32 collocz[], - f32 normalx[], f32 normaly[], f32 normalz[], - u64 ia, u64 lidx, f32 sigma_p4 - ); -} - diff --git a/vlm/backends/cpu/src/vlm_backend_cpu.cpp b/vlm/backends/cpu/src/vlm_backend_cpu.cpp index 5e3522d..8ef0a18 100644 --- a/vlm/backends/cpu/src/vlm_backend_cpu.cpp +++ b/vlm/backends/cpu/src/vlm_backend_cpu.cpp @@ -1,5 +1,4 @@ #include "vlm_backend_cpu.hpp" -#include "vlm_backend_cpu_kernels.hpp" #include "vlm_backend_cpu_kernels_ispc.h" #include "linalg.h" @@ -37,21 +36,6 @@ class MemoryCPU final : public Memory { BackendCPU::BackendCPU() : Backend(std::make_unique()) {} BackendCPU::~BackendCPU() = default; -// void BackendCPU::reset() { -// const u64 nb_panels_wing = hd_mesh->nc * hd_mesh->ns; -// const u64 nb_vertices_wing = (hd_mesh->nc+1)*(hd_mesh->ns+1); - - -// // std::fill(gamma.begin(), gamma.end(), 0.0f); -// std::fill(hd_data->lhs, hd_data->lhs + nb_panels_wing*nb_panels_wing, 0.0f); // influence kernel is += -// // std::fill(rhs.begin(), rhs.end(), 0.0f); -// memory->dd_copy(PTR_MESH_V(hd_mesh, 0, 0, 0), PTR_MESHGEOM_V(hd_mesh_geom, 0, 0, 0), nb_vertices_wing*sizeof(f32)); -// memory->dd_copy(PTR_MESH_V(hd_mesh, 0, 0, 1), PTR_MESHGEOM_V(hd_mesh_geom, 0, 0, 1), nb_vertices_wing*sizeof(f32)); -// memory->dd_copy(PTR_MESH_V(hd_mesh, 0, 0, 2), PTR_MESHGEOM_V(hd_mesh_geom, 0, 0, 2), nb_vertices_wing*sizeof(f32)); - -// dd_mesh->nwa = 0; -// } - /// @brief Compute the gamma_delta vector /// @details /// Compute the gamma_delta vector of the VLM system (\Delta\Gamma = \Gamma_{i,j} - \Gamma_{i-1,j}) @@ -181,39 +165,35 @@ void BackendCPU::rhs_assemble_velocities(View& rhs, const Vie } } -void BackendCPU::rhs_assemble_wake_influence() { +// export void kernel_wake_influence(uniform f32* uniform colloc, uniform u64 colloc_ld, uniform f32* uniform normals, uniform u64 normals_ld, uniform f32* uniform v, uniform u64 v_ld, uniform u64 v_m, uniform u64 v_n, uniform f32* uniform gamma, uniform f32* uniform rhs, uniform f32 sigma, uniform u32 iteration) { + +void BackendCPU::rhs_assemble_wake_influence(View& rhs, const View& gamma, const View& colloc, const View& normals, const View& verts_wake, u32 iteration) { // const tiny::ScopedTimer timer("Wake Influence"); + assert(rhs.layout.stride() == rhs.size()); // single dim + tf::Taskflow taskflow; - auto init = taskflow.placeholder(); - auto wake_influence = taskflow.for_each_index((u64)0, ns * nc, [&] (u64 lidx) { - ispc::kernel_wake_influence(&mesh, lidx, dd_data->gamma, dd_data->rhs, sigma_vatistas); + auto begin = taskflow.placeholder(); + auto end = taskflow.placeholder(); + + auto wake_influence = taskflow.for_each_index((u64)0, rhs.layout.stride(), [&] (u64 idx) { + for (u32 i = 0; i < rhs.layout.surfaces().size(); i++) { + ispc::kernel_wake_influence(colloc.ptr + idx, colloc.layout.stride(), normals.ptr + idx, normals.layout.stride(), verts_wake.ptr + verts_wake.layout.offset(i), verts_wake.layout.stride(), verts_wake.layout.nc(i), verts_wake.layout.ns(i), gamma.ptr + idx, rhs.ptr + idx, sigma_vatistas, iteration); + } }); - auto sync = taskflow.placeholder(); - init.precede(wake_influence); - wake_influence.precede(sync); + begin.precede(wake_influence); + wake_influence.precede(end); Executor::get().run(taskflow).wait(); } -void BackendCPU::displace_wake_rollup(float dt) { - const tiny::ScopedTimer timer("Wake Rollup"); - - const u64 nc = dd_mesh->nc; - const u64 ns = dd_mesh->ns; - const u64 nw = dd_mesh->nw; - const u64 nwa = dd_mesh->nwa; +void BackendCPU::displace_wake_rollup(View& wake_rollup, const View& verts_wake, const View& verts_wing, const View& gamma_wing, const View& gamma_wake, f32 dt, u32 iteration) { + // const tiny::ScopedTimer timer("Wake Rollup"); const u64 wake_vertices_begin = (nc + nw - nwa + 1) * (ns+1); const u64 wake_vertices_end = (nc + nw + 1) * (ns + 1); - const f32* rx = dd_data->rollup_vertices + (nc+nw+1)*(ns+1)*0; - const f32* ry = dd_data->rollup_vertices + (nc+nw+1)*(ns+1)*1; - const f32* rz = dd_data->rollup_vertices + (nc+nw+1)*(ns+1)*2; - - ispc::Mesh mesh = mesh_to_ispc(dd_mesh); - tf::Taskflow taskflow; auto init = taskflow.placeholder(); diff --git a/vlm/backends/cpu/src/vlm_backend_cpu_kernels.cpp b/vlm/backends/cpu/src/vlm_backend_cpu_kernels.cpp deleted file mode 100644 index fa7268a..0000000 --- a/vlm/backends/cpu/src/vlm_backend_cpu_kernels.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include "vlm_backend_cpu_kernels.hpp" -#include "linalg.h" -#include "vlm_types.hpp" -#include - -using namespace vlm; -using namespace linalg::alias; - -inline float3 kernel_biosavart(const float3& colloc, const float3& vertex1, const float3& vertex2, const f32& sigma) { - const f32 rcut = 1e-10f; // highly tuned value - const float3 r0 = vertex2 - vertex1; - const float3 r1 = colloc - vertex1; - const float3 r2 = colloc - vertex2; - // Katz Plotkin, Low speed Aero | Eq 10.115 - const float3 r1r2cross = linalg::cross(r1, r2); - const f32 r1_norm = linalg::length(r1); - const f32 r2_norm = linalg::length(r2); - const f32 square = linalg::length2(r1r2cross); - if ((r1_normvertices + (j) + (i) * (m->ns+1) + (k) * (m->nc+m->nw+1) * (m->ns+1)) -#define PTR_MESH_C(m, i,j,k) (m->colloc + (j) + (i) * (m->ns) + (k) * (m->nc) * (m->ns)) -#define PTR_MESH_N(m, i,j,k) (m->normals + (j) + (i) * (m->ns) + (k) * (m->nc) * (m->ns)) template inline float3 kernel_biosavart(C& colloc, const V& vertex1, const V& vertex2, const uniform float& sigma) { @@ -116,6 +92,7 @@ inline void kernel_symmetry(float3& inf, C colloc, const V& vertex0, const V& ve } // v_rld : influencer vertices row leading dimension = nb of points span wise +// Influence of a wing vortex ring (vertex quad) on all the collocation points of all wings export void kernel_influence(uniform u64 m, uniform f32* uniform lhs, uniform f32* uniform collocs, uniform u64 collocs_ld, uniform f32* uniform v, uniform u64 v_ld, uniform u64 v_rld, uniform f32* uniform normals, uniform u64 normals_ld, uniform f32 sigma) { uniform float3 vertex0 = {v[0 * v_ld] , v[1 * v_ld] , v[2 * v_ld] }; @@ -147,32 +124,37 @@ export void kernel_influence(uniform u64 m, uniform f32* uniform lhs, uniform f3 } } -export void kernel_wake_influence( - uniform const Mesh* uniform m, - uniform uint64 lidx, uniform float* uniform gamma, uniform float* uniform rhs, uniform float sigma - ) { - const uniform uint64 nb_panels = m->ns * m->nc; - const uniform float3 colloc_influenced = {m->colloc[lidx + 0*nb_panels], m->colloc[lidx + 1*nb_panels], m->colloc[lidx + 2*nb_panels]}; +/// @brief Influence of a specific wing wake on a collocation point +/// @param colloc ptr to x pos of influenced collocation point +/// @param colloc_ld leading dimension of colloc +/// @param normals ptr to x pos of normals +/// @param normals_ld leading dimension of normals +/// @param v ptr to beginning of wake vertices +/// @param v_ld leading dimension of v (for each 3D dimension) +/// @param v_m number of wake vertices chordwise +/// @param v_n number of wake vertices spanwise +/// @param gamma ptr to gamma value of influenced panel +/// @param rhs ptr to rhs value of influenced panel +/// @param sigma vortex core model factor +/// @param iteration current iteration number +export void kernel_wake_influence(uniform f32* uniform colloc, uniform u64 colloc_ld, uniform f32* uniform normals, uniform u64 normals_ld, uniform f32* uniform v, uniform u64 v_ld, uniform u64 v_m, uniform u64 v_n, uniform f32* uniform gamma, uniform f32* uniform rhs, uniform f32 sigma, uniform u32 iteration) { + const uniform float3 colloc_influenced = {colloc[0*colloc_ld], colloc[1*colloc_ld], colloc[2*colloc_ld]}; float3 induced_vel = {0.0f, 0.0f, 0.0f}; - const uniform float* uniform vx = PTR_MESH_V(m, 0, 0, 0); - const uniform float* uniform vy = PTR_MESH_V(m, 0, 0, 1); - const uniform float* uniform vz = PTR_MESH_V(m, 0, 0, 2); - // Wake influence on wing - for (uint64 i = m->nc + m->nw - m->nwa; i < m->nc + m->nw; i++) { - foreach(j = 0 ... m->ns) { - uint64 v0 = (i+0) * (m->ns+1) + j; - uint64 v1 = (i+0) * (m->ns+1) + j + 1; - uint64 v2 = (i+1) * (m->ns+1) + j + 1; - uint64 v3 = (i+1) * (m->ns+1) + j; + for (u64 i = v_m - iteration - 1; i < v_m-1; i++) { + foreach(j = 0 ... v_n - 1) { + uint64 v0 = (i+0) * v_n + j; + uint64 v1 = (i+0) * v_n + j + 1; + uint64 v2 = (i+1) * v_n + j + 1; + uint64 v3 = (i+1) * v_n + j; // Loads - const float3 vertex0 = {vx[v0], vy[v0], vz[v0]}; - const float3 vertex1 = {vx[v1], vy[v1], vz[v1]}; - const float3 vertex2 = {vx[v2], vy[v2], vz[v2]}; - const float3 vertex3 = {vx[v3], vy[v3], vz[v3]}; + const float3 vertex0 = {v[0*v_ld + v0], v[1*v_ld + v0], v[2*v_ld + v0]}; + const float3 vertex1 = {v[0*v_ld + v1], v[1*v_ld + v1], v[2*v_ld + v1]}; + const float3 vertex2 = {v[0*v_ld + v2], v[1*v_ld + v2], v[2*v_ld + v2]}; + const float3 vertex3 = {v[0*v_ld + v3], v[1*v_ld + v3], v[2*v_ld + v3]}; float3 ind = {0.0f, 0.0f, 0.0f}; kernel_symmetry(ind, colloc_influenced, vertex0, vertex1, sigma); @@ -180,49 +162,50 @@ export void kernel_wake_influence( kernel_symmetry(ind, colloc_influenced, vertex2, vertex3, sigma); kernel_symmetry(ind, colloc_influenced, vertex3, vertex0, sigma); - induced_vel += ind * gamma[i * m->ns + j]; + induced_vel += ind * gamma[i * (v_n-1) + j]; } } - const uniform float* uniform nx = PTR_MESH_N(m, 0, 0, 0); - const uniform float* uniform ny = PTR_MESH_N(m, 0, 0, 1); - const uniform float* uniform nz = PTR_MESH_N(m, 0, 0, 2); - - const uniform float3 normal = {nx[lidx], ny[lidx], nz[lidx]}; + const uniform float3 normal = {normals[0*normals_ld], normals[1*normals_ld], normals[2*normals_ld]}; const float induced_vel_dot_normal = dot(induced_vel, normal); // vertical 2 fma + mul - rhs[lidx] -= reduce_add(induced_vel_dot_normal); + *rhs -= reduce_add(induced_vel_dot_normal); } -export void kernel_rollup( - uniform const Mesh* uniform m, uniform float dt, - uniform float* uniform rollup, - uniform uint64 vidx, uniform float* uniform gamma, uniform float sigma - ) { - const uniform uint64 nb_verts_total = (m->nc+m->nw+1) * (m->ns+1); - - const uniform float* uniform vx = PTR_MESH_V(m, 0, 0, 0); - const uniform float* uniform vy = PTR_MESH_V(m, 0, 0, 1); - const uniform float* uniform vz = PTR_MESH_V(m, 0, 0, 2); - uniform float* uniform rollup_vx = rollup + 0 * nb_verts_total; - uniform float* uniform rollup_vy = rollup + 1 * nb_verts_total; - uniform float* uniform rollup_vz = rollup + 2 * nb_verts_total; - const uniform float3 vertex_influenced = {vx[vidx], vy[vidx], vz[vidx]}; +/// @brief Compute the new position of the wake vertices due to rollup effect (influence wing and wake vortex rings) +/// @param v_wake vertices of the wake vortex vertices (ptr to start of wing wake) +/// @param v_wake_ld leading dimension (for x, y and z) of the vertices of the wake vertices +/// @param v_wake_m number of wake vertices chord wise +/// @param v_wake_n number of wake vertices span wise +/// @param v_wake_idx index of the wake vertex to be updated +/// @param v_wing vertices of the wing vortex vertices (ptr to start of wing wake) +/// @param v_wing_ld leading dimension (for x, y and z) of the vertices of the wing vertices +/// @param v_wing_m number of wing vertices chord wise +/// @param v_wing_n number of wing vertices span wise +/// @param rollup vertices of the new wake vertices +/// @param rollup_ld leading dimension (for x, y and z) of the vertices of the new wake vertices +/// @param gamma_wing gamma vector of the wing vertices +/// @param gamma_wake gamma vector of the wake vertices +/// @param sigma vortex core model factor +/// @param dt time step +/// @param iteration current iteration number +export void kernel_rollup(uniform f32* uniform v_wake, uniform u64 v_wake_ld, uniform u64 v_wake_m, uniform u64 v_wake_n, uniform u64 v_wake_idx, uniform f32* uniform v_wing, uniform u64 v_wing_ld, uniform u64 v_wing_m, uniform u64 v_wing_n, uniform f32* uniform rollup, uniform u64 rollup_ld, uniform f32* uniform gamma_wing, uniform f32* uniform gamma_wake, uniform f32 sigma, uniform f32 dt, uniform u32 iteration) { + const uniform float3 vertex_influenced = {v_wake[0*v_wake_ld + v_wake_idx], v_wake[1*v_wake_ld + v_wake_idx], v_wake[2*v_wake_ld + v_wake_idx]}; float3 induced_vel = {0.0f, 0.0f, 0.0f}; // Wing influence - for (uint64 i = 0; i < m->nc; i++) { - foreach(j = 0 ... m->ns) { - uint64 v0 = (i+0) * (m->ns+1) + j; - uint64 v1 = (i+0) * (m->ns+1) + j + 1; - uint64 v2 = (i+1) * (m->ns+1) + j + 1; - uint64 v3 = (i+1) * (m->ns+1) + j; + for (uint64 i = 0; i < v_wing_m - 1; i++) { + foreach(j = 0 ... v_wing_n - 1) { + uint64 v0 = (i+0) * (v_wing_n) + j; + uint64 v1 = (i+0) * (v_wing_n) + j + 1; + uint64 v2 = (i+1) * (v_wing_n) + j + 1; + uint64 v3 = (i+1) * (v_wing_n) + j; // Loads - const float3 vertex0 = {vx[v0], vy[v0], vz[v0]}; - const float3 vertex1 = {vx[v1], vy[v1], vz[v1]}; - const float3 vertex2 = {vx[v2], vy[v2], vz[v2]}; - const float3 vertex3 = {vx[v3], vy[v3], vz[v3]}; + const float3 vertex0 = {v_wing[0*v_wing_ld + v0], v_wing[1*v_wing_ld + v0], v_wing[2*v_wing_ld + v0]}; + const float3 vertex1 = {v_wing[0*v_wing_ld + v1], v_wing[1*v_wing_ld + v1], v_wing[2*v_wing_ld + v1]}; + const float3 vertex2 = {v_wing[0*v_wing_ld + v2], v_wing[1*v_wing_ld + v2], v_wing[2*v_wing_ld + v2]}; + const float3 vertex3 = {v_wing[0*v_wing_ld + v3], v_wing[1*v_wing_ld + v3], v_wing[2*v_wing_ld + v3]}; float3 ind = {0.0f, 0.0f, 0.0f}; kernel_symmetry(ind, vertex_influenced, vertex0, vertex1, sigma); @@ -230,23 +213,23 @@ export void kernel_rollup( kernel_symmetry(ind, vertex_influenced, vertex2, vertex3, sigma); kernel_symmetry(ind, vertex_influenced, vertex3, vertex0, sigma); - induced_vel += ind * gamma[i * m->ns + j]; + induced_vel += ind * gamma_wing[i * (v_wing_n-1) + j]; } } // Wake influence - for (uint64 i = m->nc + m->nw - m->nwa; i < m->nc + m->nw; i++) { - foreach(j = 0 ... m->ns) { - uint64 v0 = (i+0) * (m->ns+1) + j; - uint64 v1 = (i+0) * (m->ns+1) + j + 1; - uint64 v2 = (i+1) * (m->ns+1) + j + 1; - uint64 v3 = (i+1) * (m->ns+1) + j; + for (u64 i = v_wake_m - iteration - 1; i < v_wake_m-1; i++) { + foreach(j = 0 ... v_wake_n - 1) { + u64 v0 = (i+0) * (v_wake_n) + j; + u64 v1 = (i+0) * (v_wake_n) + j + 1; + u64 v2 = (i+1) * (v_wake_n) + j + 1; + u64 v3 = (i+1) * (v_wake_n) + j; // Loads - const float3 vertex0 = {vx[v0], vy[v0], vz[v0]}; - const float3 vertex1 = {vx[v1], vy[v1], vz[v1]}; - const float3 vertex2 = {vx[v2], vy[v2], vz[v2]}; - const float3 vertex3 = {vx[v3], vy[v3], vz[v3]}; + const float3 vertex0 = {v_wake[0*v_wake_ld + v0], v_wake[1*v_wake_ld + v0], v_wake[2*v_wake_ld + v0]}; + const float3 vertex1 = {v_wake[0*v_wake_ld + v1], v_wake[1*v_wake_ld + v1], v_wake[2*v_wake_ld + v1]}; + const float3 vertex2 = {v_wake[0*v_wake_ld + v2], v_wake[1*v_wake_ld + v2], v_wake[2*v_wake_ld + v2]}; + const float3 vertex3 = {v_wake[0*v_wake_ld + v3], v_wake[1*v_wake_ld + v3], v_wake[2*v_wake_ld + v3]}; float3 ind = {0.0f, 0.0f, 0.0f}; kernel_symmetry(ind, vertex_influenced, vertex0, vertex1, sigma); @@ -254,143 +237,79 @@ export void kernel_rollup( kernel_symmetry(ind, vertex_influenced, vertex2, vertex3, sigma); kernel_symmetry(ind, vertex_influenced, vertex3, vertex0, sigma); - induced_vel += ind * gamma[i * m->ns + j]; + induced_vel += ind * gamma_wake[i * (v_wake_n-1) + j]; } } - rollup_vx[vidx] = vx[vidx] + reduce_add(induced_vel.x) * dt; - rollup_vy[vidx] = vy[vidx] + reduce_add(induced_vel.y) * dt; - rollup_vz[vidx] = vz[vidx] + reduce_add(induced_vel.z) * dt; + rollup[0*rollup_ld + v_wake_idx] = vertex_influenced.x + reduce_add(induced_vel.x) * dt; + rollup[1*rollup_ld + v_wake_idx] = vertex_influenced.y + reduce_add(induced_vel.y) * dt; + rollup[2*rollup_ld + v_wake_idx] = vertex_influenced.z + reduce_add(induced_vel.z) * dt; } -// export uniform float kernel_trefftz_cd( -// uniform const Mesh* uniform m, -// uniform float* uniform gamma, -// uniform float* uniform trefftz_buffer, -// uniform uint64 j, uniform uint64 n, uniform float sigma -// ) { -// const uniform uint64 nb_panels = m->ns * m->nc; -// uniform uint64 begin = nb_panels + j; -// uniform uint64 end = begin + n; -// float cd = 0.0f; - -// const uniform float* uniform vx = PTR_MESH_V(m, 0, 0, 0); -// const uniform float* uniform vy = PTR_MESH_V(m, 0, 0, 1); -// const uniform float* uniform vz = PTR_MESH_V(m, 0, 0, 2); -// const uniform float* uniform cx = PTR_MESH_C(m, 0, 0, 0); -// const uniform float* uniform cy = PTR_MESH_C(m, 0, 0, 1); -// const uniform float* uniform cz = PTR_MESH_C(m, 0, 0, 2); -// const uniform float* uniform nx = PTR_MESH_N(m, 0, 0, 0); -// const uniform float* uniform ny = PTR_MESH_N(m, 0, 0, 1); -// const uniform float* uniform nz = PTR_MESH_N(m, 0, 0, 2); - -// // Compute the induced velocity of the streamwise wake vortex segments -// for (uniform uint64 ia = nb_panels; ia < nb_panels + m->ns; ia++) { -// const uniform uint64 v0 = ia + ia / m->ns; -// const uniform uint64 v1 = v0 + 1; -// const uniform uint64 v3 = v0 + m->ns+1; -// const uniform uint64 v2 = v3 + 1; - -// // Broadcast vertices -// const float3 vertex0 = {vx[v0], vy[v0], vz[v0]}; -// const float3 vertex1 = {vx[v1], vy[v1], vz[v1]}; -// const float3 vertex2 = {vx[v2], vy[v2], vz[v2]}; -// const float3 vertex3 = {vx[v3], vy[v3], vz[v3]}; - -// uniform float gammaw = gamma[ia - m->ns]; -// foreach(ia2 = begin ... end) { -// const float3 colloc = {cx[ia2], cy[ia2], cz[ia2]}; -// const float3 normal = {nx[ia2], ny[ia2], nz[ia2]}; -// float3 inf = {0.0f, 0.0f, 0.0f}; - -// kernel_symmetry(inf, colloc, vertex1, vertex2, sigma); -// kernel_symmetry(inf, colloc, vertex3, vertex0, sigma); -// trefftz_buffer[ia2 - begin + j] += gammaw * dot(inf, normal); // store -// } -// } -// // Perform the integration (Katz Plotkin, Low speed Aero | Eq 8.146) -// foreach(i = j ... j + n) { -// uint64 li = (m->nc-1) * m->ns + i; -// float3 v0 = {vx[i], vy[i], vz[i]}; -// float3 v1 = {vx[i+1], vy[i+1], vz[i+1]}; -// float dl = length(v1 - v0); -// cd -= gamma[li] * trefftz_buffer[i] * dl; // used to have 0.5f * flow.rho -// } -// return reduce_add(cd); -// } - -export uniform float kernel_trefftz_cd2( - uniform const Mesh* uniform m, - uniform float* uniform gamma, - uniform uint64 jp, uniform uint64 n, uniform float sigma - ) { +export uniform float kernel_trefftz_cd(uniform f32* uniform v_wake, uniform u64 v_wake_ld, uniform u64 v_wake_m, uniform u64 v_wake_n, uniform float* uniform gamma_wake, uniform f32 sigma) { varying float cd = 0.0f; - const uniform float* uniform vx = PTR_MESH_V(m, 0, 0, 0); - const uniform float* uniform vy = PTR_MESH_V(m, 0, 0, 1); - const uniform float* uniform vz = PTR_MESH_V(m, 0, 0, 2); - // Loop over first wake panel spanwise section - const uniform uint64 i = m->nc; + const uniform u64 i = v_wake_m - 2; // 2nd to last wake chordwise index // parallel for - for (uniform uint64 j = jp; j < jp+n; j++) { - uniform uint64 v0 = (i+0) * (m->ns+1) + j; - uniform uint64 v1 = (i+0) * (m->ns+1) + j + 1; - uniform uint64 v2 = (i+1) * (m->ns+1) + j + 1; - uniform uint64 v3 = (i+1) * (m->ns+1) + j; + for (uniform u64 j = 0; j < v_wake_n - 1; j++) { + uniform u64 v0 = (i+0) * (v_wake_n) + j; + uniform u64 v1 = (i+0) * (v_wake_n) + j + 1; + uniform u64 v2 = (i+1) * (v_wake_n) + j + 1; + uniform u64 v3 = (i+1) * (v_wake_n) + j; // Loads - const uniform float3 vertex0 = {vx[v0], vy[v0], vz[v0]}; - const uniform float3 vertex1 = {vx[v1], vy[v1], vz[v1]}; - const uniform float3 vertex2 = {vx[v2], vy[v2], vz[v2]}; - const uniform float3 vertex3 = {vx[v3], vy[v3], vz[v3]}; + const uniform float3 vertex0 = {v_wake[0*v_wake_ld + v0], v_wake[1*v_wake_ld + v0], v_wake[2*v_wake_ld + v0]}; + const uniform float3 vertex1 = {v_wake[0*v_wake_ld + v1], v_wake[1*v_wake_ld + v1], v_wake[2*v_wake_ld + v1]}; + const uniform float3 vertex2 = {v_wake[0*v_wake_ld + v2], v_wake[1*v_wake_ld + v2], v_wake[2*v_wake_ld + v2]}; + const uniform float3 vertex3 = {v_wake[0*v_wake_ld + v3], v_wake[1*v_wake_ld + v3], v_wake[2*v_wake_ld + v3]}; const uniform float3 colloc = 0.25f * (vertex0 + vertex1 + vertex2 + vertex3); // 3*(3 add + 1 mul) varying float3 inf = {0.0f, 0.0f, 0.0f}; - foreach(jj = jp ... jp+n) { - varying uint64 vv0 = (i+0) * (m->ns+1) + jj; - varying uint64 vv1 = (i+0) * (m->ns+1) + jj + 1; - varying uint64 vv2 = (i+1) * (m->ns+1) + jj + 1; - varying uint64 vv3 = (i+1) * (m->ns+1) + jj; + foreach(jj = 0 ... v_wake_n - 1) { + varying u64 vv0 = (i+0) * (v_wake_n) + jj; + varying u64 vv1 = (i+0) * (v_wake_n) + jj + 1; + varying u64 vv2 = (i+1) * (v_wake_n) + jj + 1; + varying u64 vv3 = (i+1) * (v_wake_n) + jj; // Loads - const varying float3 vvertex0 = {vx[vv0], vy[vv0], vz[vv0]}; - const varying float3 vvertex1 = {vx[vv1], vy[vv1], vz[vv1]}; - const varying float3 vvertex2 = {vx[vv2], vy[vv2], vz[vv2]}; - const varying float3 vvertex3 = {vx[vv3], vy[vv3], vz[vv3]}; + const varying float3 vvertex0 = {v_wake[0*v_wake_ld + vv0], v_wake[1*v_wake_ld + vv0], v_wake[2*v_wake_ld + vv0]}; + const varying float3 vvertex1 = {v_wake[0*v_wake_ld + vv1], v_wake[1*v_wake_ld + vv1], v_wake[2*v_wake_ld + vv1]}; + const varying float3 vvertex2 = {v_wake[0*v_wake_ld + vv2], v_wake[1*v_wake_ld + vv2], v_wake[2*v_wake_ld + vv2]}; + const varying float3 vvertex3 = {v_wake[0*v_wake_ld + vv3], v_wake[1*v_wake_ld + vv3], v_wake[2*v_wake_ld + vv3]}; varying float3 inf2 = {0.0f, 0.0f, 0.0f}; kernel_symmetry(inf2, colloc, vvertex1, vvertex2, sigma); kernel_symmetry(inf2, colloc, vvertex3, vvertex0, sigma); - varying float gammaw = gamma[(m->nc-1)*m->ns + jj]; + varying float gammaw = gamma_wake[i * (v_wake_n-1) + jj]; inf += gammaw * inf2; } const uniform float3 normal = quad_normal(vertex0, vertex1, vertex2, vertex3); const varying float induced_vel = dot(inf, normal); - cd -= gamma[(m->nc-1)*m->ns + j] * induced_vel * length(vertex1 - vertex0); + cd -= gamma_wake[i * (v_wake_n-1) + j] * induced_vel * length(vertex1 - vertex0); } return reduce_add(cd); // hadd, hadd, extract } -export uniform float kernel_trefftz_cl( - uniform const Mesh* uniform m, - uniform float* uniform gamma, - uniform uint64 j, uniform uint64 n - ) { - float cl = 0.0f; - const uniform float* uniform vx = PTR_MESH_V(m, 0, 0, 0); - const uniform float* uniform vy = PTR_MESH_V(m, 0, 0, 1); - const uniform float* uniform vz = PTR_MESH_V(m, 0, 0, 2); - foreach(i = j ... j + n) { - uint64 li = (m->nc-1) * m->ns + i; - float3 v0 = {vx[i], vy[i], vz[i]}; - float3 v1 = {vx[i+1], vy[i+1], vz[i+1]}; - float dl = length(v1 - v0); - cl += gamma[li]* dl; // used to have 0.5f * flow.rho - } - return reduce_add(cl); -} +// export uniform float kernel_trefftz_cl( +// uniform const Mesh* uniform m, +// uniform float* uniform gamma, +// uniform uint64 j, uniform uint64 n +// ) { +// float cl = 0.0f; +// const uniform float* uniform vx = PTR_MESH_V(m, 0, 0, 0); +// const uniform float* uniform vy = PTR_MESH_V(m, 0, 0, 1); +// const uniform float* uniform vz = PTR_MESH_V(m, 0, 0, 2); +// foreach(i = j ... j + n) { +// uint64 li = (m->nc-1) * m->ns + i; +// float3 v0 = {vx[i], vy[i], vz[i]}; +// float3 v1 = {vx[i+1], vy[i+1], vz[i+1]}; +// float dl = length(v1 - v0); +// cl += gamma[li]* dl; // used to have 0.5f * flow.rho +// } +// return reduce_add(cl); +// } diff --git a/vlm/include/vlm_backend.hpp b/vlm/include/vlm_backend.hpp index 0184d4f..5d6e649 100644 --- a/vlm/include/vlm_backend.hpp +++ b/vlm/include/vlm_backend.hpp @@ -23,8 +23,8 @@ class Backend { // Kernels that run for all the meshes virtual void lhs_assemble(View>& lhs, const View& colloc, const View& normals, const View& verts_wing, const View& verts_wake, u32 iteration) = 0; virtual void rhs_assemble_velocities(View& rhs, const View& normals, const View& velocities) = 0; - virtual void rhs_assemble_wake_influence(View& rhs, const View& gamma) = 0; - virtual void displace_wake_rollup(float dt, View& rollup_vertices, View& verts_wake, const View& verts_wing, const View& gamma) = 0; + virtual void rhs_assemble_wake_influence(View& rhs, const View& gamma, const View& colloc, const View& normals, const View& verts_wake, u32 iteration) = 0; + virtual void displace_wake_rollup(View& wake_rollup, const View& verts_wake, const View& verts_wing, const View& gamma_wing, const View& gamma_wake, f32 dt, u32 iteration) = 0; // virtual void displace_wing(const linalg::alias::float4x4& transform) = 0; virtual void displace_wing_and_shed(const std::vector& transform, View& verts_wing, View& verts_wake) = 0; virtual void gamma_shed(View& gamma, View& gamma_prev) = 0;