Skip to content

Commit

Permalink
add ispc cd trefftz kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
samayala22 committed Feb 14, 2024
1 parent 655d7e7 commit 1f33f32
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 146 deletions.
12 changes: 6 additions & 6 deletions headeronly/tinytimer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,24 @@

namespace tiny {

constexpr static long long us_in_s = 1'000'000LL;
constexpr static long long us_in_ms = 1'000LL;
constexpr static double us_in_s = 1'000'000;
constexpr static double us_in_ms = 1'000;

class ScopedTimer {
public:
ScopedTimer(const char* name) : m_name(name), m_start(std::chrono::high_resolution_clock::now()) {}

~ScopedTimer() {
const auto m_end = std::chrono::high_resolution_clock::now();
const auto duration = static_cast<long long>(std::chrono::duration_cast<std::chrono::microseconds>(m_end - m_start).count());
const auto duration = static_cast<double>(std::chrono::duration_cast<std::chrono::microseconds>(m_end - m_start).count());
std::printf("%s: ", m_name);

if (duration > us_in_s) {
std::printf("%lld s\n", duration / us_in_s);
std::printf("%.2f s\n", duration / us_in_s);
} else if (duration > us_in_ms) {
std::printf("%lld ms\n", duration / us_in_ms);
std::printf("%.0f ms\n", duration / us_in_ms);
} else {
std::printf("%lld us\n", duration);
std::printf("%.0f us\n", duration);
}
}
private:
Expand Down
184 changes: 52 additions & 132 deletions vlm/backends/cpu/src/vlm_backend_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@
#include "vlm_utils.hpp"
#include "vlm_executor.hpp" // includes taskflow/taskflow.hpp

#include <algorithm>
#include <cstdio>
#include <fstream>
#include <immintrin.h>
#include <algorithm> // std::fill
#include <cstdio> // std::printf

#include <limits>
#include <taskflow/algorithm/for_each.hpp>

#include <lapacke.h>
Expand Down Expand Up @@ -135,29 +132,29 @@ void BackendCPU::lu_solve() {
LAPACKE_sgetrs(LAPACK_COL_MAJOR, 'N', n, 1, lhs.data(), n, ipiv.data(), gamma.data(), n);
}

f32 BackendCPU::compute_coefficient_cl(const FlowData& flow, const f32 area,
const u32 j, const u32 n) {
assert(n > 0);
assert(j >= 0 && j+n <= mesh.ns);
// f32 BackendCPU::compute_coefficient_cl(const FlowData& flow, const f32 area,
// const u32 j, const u32 n) {
// assert(n > 0);
// assert(j >= 0 && j+n <= mesh.ns);

f32 cl = 0.0f;

for (u32 u = 0; u < mesh.nc; u++) {
for (u32 v = j; v < j + n; v++) {
const u32 li = u * mesh.ns + v; // linear index
const linalg::alias::float3 v0 = mesh.get_v0(li);
const linalg::alias::float3 v1 = mesh.get_v1(li);
// Leading edge vector pointing outward from wing root
const linalg::alias::float3 dl = v1 - v0;
// Distance from the center of leading edge to the reference point
const linalg::alias::float3 force = linalg::cross(flow.freestream, dl) * flow.rho * delta_gamma[li];
cl += linalg::dot(force, flow.lift_axis);
}
}
cl /= 0.5f * flow.rho * linalg::length2(flow.freestream) * area;

return cl;
}
// f32 cl = 0.0f;

// for (u32 u = 0; u < mesh.nc; u++) {
// for (u32 v = j; v < j + n; v++) {
// const u32 li = u * mesh.ns + v; // linear index
// const linalg::alias::float3 v0 = mesh.get_v0(li);
// const linalg::alias::float3 v1 = mesh.get_v1(li);
// // Leading edge vector pointing outward from wing root
// const linalg::alias::float3 dl = v1 - v0;
// // Distance from the center of leading edge to the reference point
// const linalg::alias::float3 force = linalg::cross(flow.freestream, dl) * flow.rho * delta_gamma[li];
// cl += linalg::dot(force, flow.lift_axis);
// }
// }
// cl /= 0.5f * flow.rho * linalg::length2(flow.freestream) * area;

// return cl;
// }

linalg::alias::float3 BackendCPU::compute_coefficient_cm(
const FlowData& flow,
Expand Down Expand Up @@ -188,75 +185,6 @@ linalg::alias::float3 BackendCPU::compute_coefficient_cm(
return cm;
}

inline void kernel_biosavart(f32& vx, f32& vy, f32& vz, f32& x, f32& y, f32& z, f32& x1, f32& y1, f32& z1, f32& x2, f32& y2, f32& z2) {
static const f32 rcut = 1.0e-10f;
vx = 0.0f;
vy = 0.0f;
vz = 0.0f;

// Katz Plotkin, Low speed Aero | Eq 10.115
const f32 r1r2x = (y-y1)*(z-z2) - (z-z1)*(y-y2);
const f32 r1r2y = -((x-x1)*(z-z2) - (z-z1)*(x-x2));
const f32 r1r2z = (x-x1)*(y-y2) - (y-y1)*(x-x2);

const f32 r1 = std::sqrt(pow<2>(x-x1)+pow<2>(y-y1)+pow<2>(z-z1));
const f32 r2 = std::sqrt(pow<2>(x-x2)+pow<2>(y-y2)+pow<2>(z-z2));
const f32 square = pow<2>(r1r2x) + pow<2>(r1r2y) + pow<2>(r1r2z);
if ((r1<rcut) || (r2<rcut) || (square<rcut)) return;

const f32 r0r1 = (x2-x1)*(x-x1)+(y2-y1)*(y-y1)+(z2-z1)*(z-z1);
const f32 r0r2 = (x2-x1)*(x-x2)+(y2-y1)*(y-y2)+(z2-z1)*(z-z2);
const f32 coeff = 1.0f/(4.0f*PI_f*square) * (r0r1/r1 - r0r2/r2);

vx = coeff * r1r2x;
vy = coeff * r1r2y;
vz = coeff * r1r2z;
}

inline void kernel_symmetry(f32& inf_x, f32& inf_y, f32& inf_z, f32 x, f32 y, f32 z, f32 x1, f32 y1, f32 z1, f32 x2, f32 y2, f32 z2) {
f32 vx, vy, vz;
kernel_biosavart(vx, vy, vz, x, y, z, x1, y1, z1, x2, y2, z2);
inf_x += vx;
inf_y += vy;
inf_z += vz;
y = -y; // wing symmetry
kernel_biosavart(vx, vy, vz, x, y, z, x1, y1, z1, x2, y2, z2);
inf_x += vx;
inf_y -= vy;
inf_z += vz;
}

using namespace linalg::alias;

inline float3 kernel_biosavart(const float3& colloc, const float3& vertex1, const float3& vertex2, const f32& sigma) {
const f32 rcut = 1e-10f; // highly tuned value
const float3 r0 = vertex2 - vertex1;
const float3 r1 = colloc - vertex1;
const float3 r2 = colloc - vertex2;
// Katz Plotkin, Low speed Aero | Eq 10.115
const float3 r1r2cross = linalg::cross(r1, r2);
const f32 r1_norm = linalg::length(r1);
const f32 r2_norm = linalg::length(r2);
const f32 square = linalg::length2(r1r2cross);
if ((r1_norm<rcut) || (r2_norm<rcut) || (square<rcut)) {
return float3{0.0f, 0.0f, 0.0f};
}

const f32 smoother = sigma*sigma*linalg::length2(r0);
const f32 coeff = (linalg::dot(r0,r1)/r1_norm - linalg::dot(r0,r2)/r2_norm) / (4.0f*PI_f*std::sqrt(square*square + smoother*smoother));
return r1r2cross * coeff;
}

inline void kernel_symmetry(float3& inf, float3 colloc, const float3& vertex0, const float3& vertex1, const f32 sigma) {
float3 induced_speed = kernel_biosavart(colloc, vertex0, vertex1, sigma);
inf += induced_speed;
colloc.y = -colloc.y; // wing symmetry
float3 induced_speed_sym = kernel_biosavart(colloc, vertex0, vertex1, sigma);
inf.x += induced_speed_sym.x;
inf.y -= induced_speed_sym.y;
inf.z += induced_speed_sym.z;
}

f32 BackendCPU::compute_coefficient_cd(
const FlowData& flow,
const f32 area,
Expand All @@ -267,42 +195,34 @@ f32 BackendCPU::compute_coefficient_cd(
assert(n > 0);
assert(j >= 0 && j+n <= mesh.ns);
std::fill(trefftz_buffer.begin(), trefftz_buffer.end(), 0.0f);

f32 cd = 0.0f;
// Drag coefficent computed using Trefftz plane
const u32 begin = j + mesh.nb_panels_wing();
const u32 end = begin + n;
// parallel for
for (u32 ia = begin; ia < end; ia++) {
const u32 v0 = ia + ia / mesh.ns;
const u32 v1 = v0 + 1;
const u32 v3 = v0 + n+1;
const u32 v2 = v3 + 1;

const float3 vertex0{mesh.v.x[v0], mesh.v.y[v0], mesh.v.z[v0]};
const float3 vertex1{mesh.v.x[v1], mesh.v.y[v1], mesh.v.z[v1]};
const float3 vertex2{mesh.v.x[v2], mesh.v.y[v2], mesh.v.z[v2]};
const float3 vertex3{mesh.v.x[v3], mesh.v.y[v3], mesh.v.z[v3]};

const f32 gammaw = gamma[ia - mesh.ns];

for (u32 ia2 = begin; ia2 < end; ia2++) {
const float3 colloc(mesh.colloc.x[ia2], mesh.colloc.y[ia2], mesh.colloc.z[ia2]);
const float3 normal(mesh.normal.x[ia2], mesh.normal.y[ia2], mesh.normal.z[ia2]);
linalg::alias::float3 inf2(0.f, 0.f, 0.f);
// Influence from the streamwise vortex lines
kernel_symmetry(inf2, colloc, vertex1, vertex2, flow.sigma_vatistas);
kernel_symmetry(inf2, colloc, vertex3, vertex0, flow.sigma_vatistas);
// This is the induced velocity calculated with the vortex (gamma) calculated earlier (according to kutta condition)
trefftz_buffer[ia2 - begin] += gammaw * linalg::dot(inf2, normal);
}
}

for (u32 i = 0; i < mesh.ns; i++) {
const u32 li = (mesh.nc-1) * mesh.ns + i;
const f32 dl = mesh.strip_width(i);
cd -= gamma[li] * trefftz_buffer[i] * dl; // used to have 0.5f * flow.rho
}
Mesh& m = mesh;
ispc::MeshProxy mesh_proxy = {
m.ns, m.nc, m.nb_panels_wing(),
{m.v.x.data(), m.v.y.data(), m.v.z.data()},
{m.colloc.x.data(), m.colloc.y.data(), m.colloc.z.data()},
{m.normal.x.data(), m.normal.y.data(), m.normal.z.data()}
};
f32 cd = ispc::kernel_trefftz_cd(mesh_proxy, gamma.data(), trefftz_buffer.data(), j, n, flow.sigma_vatistas);
cd /= linalg::length2(flow.freestream) * area;
return cd;
}
}

// Using Trefftz plane
f32 BackendCPU::compute_coefficient_cl(
const FlowData& flow,
const f32 area,
const u32 j,
const u32 n)
{
Mesh& m = mesh;
ispc::MeshProxy mesh_proxy = {
m.ns, m.nc, m.nb_panels_wing(),
{m.v.x.data(), m.v.y.data(), m.v.z.data()},
{m.colloc.x.data(), m.colloc.y.data(), m.colloc.z.data()},
{m.normal.x.data(), m.normal.y.data(), m.normal.z.data()}
};
f32 cl = ispc::kernel_trefftz_cl(mesh_proxy, gamma.data(), j, n);
cl /= 0.5f * flow.rho * linalg::length2(flow.freestream) * area;
return cl;
}
71 changes: 63 additions & 8 deletions vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ export struct MeshProxy {
};

// Bio-savart Kernel
#define RCUT 1e-10f // highly tuned value
#define RCUT 1e-10f
#define RCUT2 1e-5f

#define PI_f 3.141593f

Expand All @@ -58,7 +59,7 @@ inline float3 kernel_biosavart(float3& colloc, const uniform float3& vertex1, co
float r2_norm = length(r2);
float square = length2(r1r2cross);

if ((r1_norm<RCUT) || (r2_norm<RCUT) || (square<RCUT)) {
if ((square<RCUT) || (r1_norm<RCUT2) || (r2_norm<RCUT2)) {
float3 res = {0.0f, 0.0f, 0.0f};
return res;
}
Expand Down Expand Up @@ -110,9 +111,63 @@ export void kernel_influence(
}
}

// export uniform float kernel_trefftz_cd(
// uniform const MeshProxy& m,
// uniform float* uniform gamma,
// uniform unsigned int ia, uniform float sigma
// ) {
// }
export uniform float kernel_trefftz_cd(
uniform const MeshProxy& m,
uniform float* uniform gamma,
uniform float* uniform trefftz_buffer,
uniform unsigned int j, uniform unsigned int n, uniform float sigma
) {
uniform unsigned int begin = m.nb_panels + j;
uniform unsigned int end = begin + n;
float cd = 0.0f;

// Compute the induced velocity of the streamwise wake vortex segments
for (uniform unsigned int ia = m.nb_panels; ia < m.nb_panels + m.ns; ia++) {
const uniform unsigned int v0 = ia + ia / m.ns;
const uniform unsigned int v1 = v0 + 1;
const uniform unsigned int v3 = v0 + m.ns+1;
const uniform unsigned int v2 = v3 + 1;

// Broadcast vertices
const uniform float3 vertex0 = {m.v.x[v0], m.v.y[v0], m.v.z[v0]};
const uniform float3 vertex1 = {m.v.x[v1], m.v.y[v1], m.v.z[v1]};
const uniform float3 vertex2 = {m.v.x[v2], m.v.y[v2], m.v.z[v2]};
const uniform float3 vertex3 = {m.v.x[v3], m.v.y[v3], m.v.z[v3]};

uniform float gammaw = gamma[ia - m.ns];
foreach(ia2 = begin ... end) {
const float3 colloc = {m.colloc.x[ia2], m.colloc.y[ia2], m.colloc.z[ia2]};
const float3 normal = {m.normal.x[ia2], m.normal.y[ia2], m.normal.z[ia2]};
float3 inf = {0.0f, 0.0f, 0.0f};

kernel_symmetry(inf, colloc, vertex1, vertex2, sigma);
kernel_symmetry(inf, colloc, vertex3, vertex0, sigma);
trefftz_buffer[ia2 - begin + j] += gammaw * dot(inf, normal); // store
}
}
// Perform the integration (Katz Plotkin, Low speed Aero | Eq 8.146)
foreach(i = j ... j + n) {
unsigned int li = (m.nc-1) * m.ns + i;
float3 v0 = {m.v.x[i], m.v.y[i], m.v.z[i]};
float3 v1 = {m.v.x[i+1], m.v.y[i+1], m.v.z[i+1]};
float dl = length(v1 - v0);
cd -= gamma[li] * trefftz_buffer[i] * dl; // used to have 0.5f * flow.rho
}
return reduce_add(cd);
}

export uniform float kernel_trefftz_cl(
uniform const MeshProxy& m,
uniform float* uniform gamma,
uniform unsigned int j, uniform unsigned int n
) {
float cl = 0.0f;
foreach(i = j ... j + n) {
unsigned int li = (m.nc-1) * m.ns + i;
float3 v0 = {m.v.x[i], m.v.y[i], m.v.z[i]};
float3 v1 = {m.v.x[i+1], m.v.y[i+1], m.v.z[i+1]};
float dl = length(v1 - v0);
cl += gamma[li]* dl; // used to have 0.5f * flow.rho
}
return reduce_add(cl);
}

0 comments on commit 1f33f32

Please sign in to comment.