Skip to content

Commit

Permalink
wip sync
Browse files Browse the repository at this point in the history
  • Loading branch information
samayala22 committed Jun 11, 2024
1 parent b808564 commit 06a83c1
Show file tree
Hide file tree
Showing 11 changed files with 707 additions and 601 deletions.
5 changes: 4 additions & 1 deletion vlm/backends/cpu/include/vlm_backend_cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ namespace vlm {

class BackendCPU : public Backend {
public:
BackendCPU();
BackendCPU(MeshGeom* mesh, u64 timesteps);
~BackendCPU();
void reset() override;
void lhs_assemble() override;
Expand All @@ -24,6 +24,9 @@ class BackendCPU : public Backend {
void compute_delta_gamma() override;
void set_velocities(const linalg::alias::float3& vel) override;
void set_velocities(const SoA_3D_t<f32>& vels) override;
private:
f32 mesh_mac(u64 j, u64 n) override; // mean chord
f32 mesh_area(const u64 i, const u64 j, const u64 m, const u64 n) override; // mean span
};

} // namespace vlm
146 changes: 113 additions & 33 deletions vlm/backends/cpu/src/vlm_backend_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,49 +20,42 @@
#include <cblas.h>

using namespace vlm;

template<typename T>
void print_buffer(const T* start, u64 size) {
std::cout << "[";
for (u64 i = 0; i < size; i++) {
std::cout << start[i] << ",";
}
std::cout << "]\n";
}

using namespace linalg::ostream_overloads;

BackendCPU::~BackendCPU() = default; // Destructor definition

// TODO: replace any kind of size arithmetic with methods
BackendCPU::BackendCPU(Mesh& mesh) : Backend(mesh) {
lhs.resize(mesh.nb_panels_wing() * mesh.nb_panels_wing());

rollup_vertices.resize(mesh.nb_vertices_total()); // TODO: exclude the wing vertices
local_velocities.resize(mesh.nb_panels_wing());
rhs.resize(mesh.nb_panels_wing());
ipiv.resize(mesh.nb_panels_wing());
gamma.resize((mesh.nc + mesh.nw) * mesh.ns); // store wake gamma as well
gamma_prev.resize(mesh.nb_panels_wing());
delta_gamma.resize(mesh.nb_panels_wing());
trefftz_buffer.resize(mesh.ns);
BackendCPU::BackendCPU(MeshGeom* mesh, u64 timesteps) {
allocator.h_malloc = std::malloc;
allocator.d_malloc = std::malloc;
allocator.h_free = std::free;
allocator.d_free = std::free;
allocator.hh_memcpy = std::memcpy;
allocator.hd_memcpy = std::memcpy;
allocator.dh_memcpy = std::memcpy;
allocator.dd_memcpy = std::memcpy;
allocator.h_memset = std::memset;
allocator.d_memset = std::memset;
init(mesh, timesteps);
}

void BackendCPU::reset() {
// TODO: verify that these are needed
std::fill(gamma.begin(), gamma.end(), 0.0f);
std::fill(lhs.begin(), lhs.end(), 0.0f);
std::fill(rhs.begin(), rhs.end(), 0.0f);
// std::fill(gamma.begin(), gamma.end(), 0.0f);
// std::fill(lhs.begin(), lhs.end(), 0.0f);
// std::fill(rhs.begin(), rhs.end(), 0.0f);
}

void BackendCPU::compute_delta_gamma() {
const u64 nc = hd_mesh->nc;
const u64 ns = hd_mesh->ns;
const u64 nw = hd_mesh->nw;
// const tiny::ScopedTimer timer("Delta Gamma");
std::copy(gamma.data(), gamma.data()+mesh.ns, delta_gamma.data());
std::copy(dd_data->gamma, dd_data->gamma+ns, dd_data->delta_gamma);

// note: this is efficient as the memory is contiguous
for (u64 i = 1; i < mesh.nc; i++) {
for (u64 j = 0; j < mesh.ns; j++) {
delta_gamma[i*mesh.ns + j] = gamma[i*mesh.ns + j] - gamma[(i-1)*mesh.ns + j];
for (u64 i = 1; i < nc; i++) {
for (u64 j = 0; j < ns; j++) {
dd_data->delta_gamma[i*ns + j] = dd_data->gamma[i*ns + j] - dd_data->gamma[(i-1)*ns + j];
}
}
}
Expand Down Expand Up @@ -114,10 +107,16 @@ void BackendCPU::lhs_assemble() {

void BackendCPU::compute_rhs() {
const tiny::ScopedTimer timer("RHS");
const Mesh& m = mesh;

for (u64 i = 0; i < m.nb_panels_wing(); i++) {
rhs[i] = - (local_velocities.x[i] * m.normal.x[i] + local_velocities.y[i] * m.normal.y[i] + local_velocities.z[i] * m.normal.z[i]);
const u64 nc = hd_mesh->nc;
const u64 ns = hd_mesh->ns;
const u64 nw = hd_mesh->nw;
const u64 nb_panels_wing = nc * ns;

for (u64 i = 0; i < nb_panels_wing(); i++) {
rhs[i] = - (
dd_data->local_velocities[i + 0 * nb_panels_wing] * dd_mesh->normal[i + 0 * nb_panels_wing] +
dd_data->local_velocities[i + 1 * nb_panels_wing] * dd_mesh->normal[i + 1 * nb_panels_wing] +
dd_data->local_velocities[i + 2 * nb_panels_wing] * dd_mesh->normal[i + 2 * nb_panels_wing]);
}
}

Expand Down Expand Up @@ -361,3 +360,84 @@ void BackendCPU::set_velocities(const SoA_3D_t<f32>& vels) {
std::copy(vels.y.begin(), vels.y.end(), local_velocities.y.begin());
std::copy(vels.z.begin(), vels.z.end(), local_velocities.z.begin());
}

/// @brief Computes the chord length of a chordwise segment
/// @details Since the mesh follows the camber line, the chord length is computed
/// as the distance between the first and last vertex of a chordwise segment
/// @param j chordwise segment index
f32 Mesh::chord_length(const u64 j) const {
const f32 dx = v.x[j + nc * (ns+1)] - v.x[j];
const f32 dy = 0.0f; // chordwise segments are parallel to the x axis
const f32 dz = v.z[j + nc * (ns+1)] - v.z[j];

return std::sqrt(dx * dx + dy * dy + dz * dz);
}

// i: chordwise index, j: spanwise index, k: x,y,z dim
#define PTR_MESH_V(m, i,j,k) (m->vertices + j + i * (m->ns+1) + k * (m->nc+m->nw+1) * (m->ns+1))

/// @brief Computes the mean chord of a set of panels
/// @details
/// Mean Aerodynamic Chord = \frac{2}{S} \int_{0}^{b/2} c(y)^{2} dy
/// Integration using the Trapezoidal Rule
/// Validated against empirical formulas for tapered wings
/// @param j first panel index spanwise
/// @param n number of panels spanwise
/// @return mean chord of the set of panels
f32 BackendCPU::mesh_mac(u64 j, u64 n) {
assert(j + n <= hd_mesh->ns); // spanwise range
assert(n > 0);
u64 nc = hd_mesh->nc;
u64 ns = hd_mesh->ns;
u64 nw = hd_mesh->nw;

// Leading edge vertex
f32* lvx = PTR_MESH_V(hd_mesh, 0,0,0);
f32* lvy = PTR_MESH_V(hd_mesh, 0,0,1);
f32* lvz = PTR_MESH_V(hd_mesh, 0,0,2);
// Trailing edge vertex
f32* tvx = PTR_MESH_V(hd_mesh, nc,0,0);
f32* tvy = PTR_MESH_V(hd_mesh, nc,0,1);
f32* tvz = PTR_MESH_V(hd_mesh, nc,0,2);

f32 mac = 0.0f;
// loop over panel chordwise sections in spanwise direction
// Note: can be done optimally with vertical fused simd
for (u64 v = j; v < j+n; v++) {
// left and right chord lengths
const f32 dx0 = tvx[v] - lvx[v];
const f32 dy0 = tvy[v] - lvy[v];
const f32 dz0 = tvz[v] - lvz[v];
const f32 dx1 = tvx[v + 1] - lvx[v + 1];
const f32 dy1 = tvy[v + 1] - lvy[v + 1];
const f32 dz1 = tvz[v + 1] - lvz[v + 1];
const f32 c0 = std::sqrt(dx0 * dx0 + dy0 * dy0 + dz0 * dz0);
const f32 c1 = std::sqrt(dx1 * dx1 + dy1 * dy1 + dz1 * dz1);
// Panel width
const f32 dx3 = lvx[v + 1] - lvx[v];
const f32 dy3 = lvy[v + 1] - lvy[v];
const f32 dz3 = lvz[v + 1] - lvz[v];
const f32 width = std::sqrt(dx3 * dx3 + dy3 * dy3 + dz3 * dz3);

mac += 0.5f * (c0 * c0 + c1 * c1) * width;
}
// Since we divide by half the total wing area (both sides) we dont need to multiply by 2
return mac / mesh_area(0, j, nc, n);
}

f32 BackendCPU::mesh_area(const u64 i, const u64 j, const u64 m, const u64 n) {
assert(i + m <= nc);
assert(j + n <= ns);
u64 nc = hd_mesh->nc;
u64 ns = hd_mesh->ns;
u64 nw = hd_mesh->nw;

const f32* areas = hd_mesh->area + j + i * (m->ns);
f32 area_sum = 0.0f;
for (u64 u = 0; u < m; u++) {
for (u64 v = 0; v < n; v++) {
area_sum += areas[v + u * ns];
}
}
return area_sum;
}
62 changes: 32 additions & 30 deletions vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc
Original file line number Diff line number Diff line change
Expand Up @@ -27,37 +27,37 @@ inline uniform float length2(const uniform float3& v) {return LENGTH2(v);}
inline float length(const float3& v) {return LENGTH(v);}
inline uniform float length(const uniform float3& v) {return LENGTH(v);}

// Aggregate Structures
export struct SoA3D {
uniform float* uniform x;
uniform float* uniform y;
uniform float* uniform z;
};

export struct MeshProxy {
uniform uint64 ns;
uniform uint64 nc;
uniform uint64 nb_panels;
uniform SoA3D v; // vertices
uniform SoA3D colloc; // collocation points
uniform SoA3D normal; // normals
};

export struct MeshView {
uniform uint64 ni; // wing chord panels
uniform uint64 nj; // wing span panels
uniform uint64 nw; // wake chord capacity
uniform uint64 cw; // number of active wake panels
uniform SoA3D v; // vertices
uniform SoA3D colloc; // collocation points
uniform SoA3D normal; // normals
struct Mesh2 {
f32* vertices = nullptr; // (nc+nw+1)*(ns+1)*3
// TODO: evaluate if we really need those values or we can just compute on the fly in the kernels
f32* normals = nullptr; // nc*ns*3
f32* colloc = nullptr; // nc*ns*3
f32* area = nullptr; // nc*ns

u64 nc = 0; // number of wing panels chordwise
u64 ns = 0; // number of wing panels spanwise
u64 nw = 0; // number of wake panels chordwise
u64 nwa = 0; // number of wake panels active chordwise

f32 s_ref = 0.0f; // reference area
f32 c_ref = 0.0f; // reference chord
f32 frame[16] = {
1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f
}; // Col major order (TODO: move this to kinematic tracker)

char* name = nullptr; // mesh name
bool lifting = true;
};

// Bio-savart Kernel
#define RCUT 1e-10f
#define RCUT2 1e-5f

#define PI_f 3.141593f
#define PTR_MESH_V(m, i,j,k) (m->vertices + j + i * (m->ns+1) + k * (m->nc+m->nw+1) * (m->ns+1))

template<typename C, typename V>
inline float3 kernel_biosavart(C& colloc, const V& vertex1, const V& vertex2, const uniform float& sigma) {
Expand Down Expand Up @@ -95,7 +95,7 @@ inline void kernel_symmetry(float3& inf, C colloc, const V& vertex0, const V& ve
}

export void kernel_influence(
uniform const MeshProxy& m,
uniform const Mesh2* uniform m,
uniform float* uniform lhs,
uniform uint64 ia, uniform uint64 lidx, uniform float sigma
) {
Expand All @@ -105,12 +105,14 @@ export void kernel_influence(
const uniform uint64 v3 = v0 + m.ns+1;
const uniform uint64 v2 = v3 + 1;


// Broadcast vertices
uniform float3 vertex0 = {m.v.x[v0], m.v.y[v0], m.v.z[v0]};
uniform float3 vertex1 = {m.v.x[v1], m.v.y[v1], m.v.z[v1]};
uniform float3 vertex2 = {m.v.x[v2], m.v.y[v2], m.v.z[v2]};
uniform float3 vertex3 = {m.v.x[v3], m.v.y[v3], m.v.z[v3]};
const uniform float* uniform vx = PTR_MESH_V(m, 0, 0, 0);
const uniform float* uniform vy = PTR_MESH_V(m, 0, 0, 1);
const uniform float* uniform vz = PTR_MESH_V(m, 0, 0, 2);
uniform float3 vertex0 = {vx[v0], vy[v0], vz[v0]};
uniform float3 vertex1 = {vx[v1], vy[v1], vz[v1]};
uniform float3 vertex2 = {vx[v2], vy[v2], vz[v2]};
uniform float3 vertex3 = {vx[v3], vy[v3], vz[v3]};

// print("Influencer %: \n", lidx);
// print("Vertex 0: % % %\n", vertex0.x, vertex0.y, vertex0.z);
Expand Down
14 changes: 1 addition & 13 deletions vlm/backends/cuda/include/vlm_backend_cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,9 @@ struct MeshProxy {

class BackendCUDA : public Backend {
public:
BackendCPU default_backend; // temporary

float* d_lhs = nullptr;
float* d_rhs = nullptr;
float* d_gamma = nullptr;
float* d_delta_gamma = nullptr;

int* d_solver_info = nullptr;
int* d_solver_ipiv = nullptr;
float* d_solver_buffer = nullptr;

MeshProxy h_mesh;
MeshProxy* d_mesh;

BackendCUDA(Mesh& mesh);
BackendCUDA(MeshGeom* mesh, u64 timesteps);
~BackendCUDA();

void reset() override;
Expand Down
6 changes: 3 additions & 3 deletions vlm/include/vlm_allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

namespace vlm {

typedef void* (*malloc_f)(unsigned long long size);
typedef void* (*malloc_f)(size_t size);
typedef void (*free_f)(void* ptr);
typedef void* (*memcpy_f)(void* dst, const void* src, unsigned long long size);
typedef void* (*memset_f)(void* dst, int value, unsigned long long size);
typedef void* (*memcpy_f)(void* dst, const void* src, size_t size);
typedef void* (*memset_f)(void* dst, int value, size_t size);

struct Allocator {
malloc_f h_malloc;
Expand Down
16 changes: 12 additions & 4 deletions vlm/include/vlm_backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,21 @@ class Backend {
MeshGeom* dd_mesh_geom;

// Mutable meshes (temporal state)
Mesh2* hh_mesh; // host ptr to host buffers for io
// Mesh2* hh_mesh; // host ptr to host buffers for io
Mesh2* hd_mesh; // host ptr to device buffers
Mesh2* dd_mesh; // device ptr to device buffers for kernels

Data* hd_data;
Data* dd_data;

i32* d_solver_info = nullptr;
i32* d_solver_ipiv = nullptr;
f32* d_solver_buffer = nullptr;

f32 sigma_vatistas = 0.0f;
Backend(MeshGeom* mesh_geom);
void init(u64 timesteps);
Backend() = default;
~Backend();
void init(MeshGeom* mesh_geom, u64 timesteps); // Acts as delayed constructor
virtual void reset() = 0;
virtual void lhs_assemble() = 0;
virtual void compute_rhs() = 0;
Expand All @@ -46,7 +51,10 @@ class Backend {
virtual void compute_delta_gamma() = 0;
virtual void set_velocities(const linalg::alias::float3& vel) = 0;
virtual void set_velocities(const SoA_3D_t<f32>& vels) = 0;
virtual ~Backend() = default;

private:
virtual f32 mesh_mac(u64 j, u64 n) = 0; // mean chord
virtual f32 mesh_area(const u64 i, const u64 j, const u64 m, const u64 n) = 0; // mean span
};

std::unique_ptr<Backend> create_backend(const std::string& backend_name, const MeshGeom* mesh, int timesteps);
Expand Down
22 changes: 10 additions & 12 deletions vlm/include/vlm_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,23 @@

#include "vlm_types.hpp"
#include "vlm_fwd.hpp"
#include "vlm_allocator.hpp"
#include "tinyinterpolate.hpp"

namespace vlm {

struct Data {
f32* lhs = nullptr;
f32* rhs = nullptr;
f32* gamma = nullptr;
f32* delta_gamma = nullptr;
f32* rollup_vertices = nullptr;
f32* local_velocities = nullptr;
f32* trefftz_buffer = nullptr;

i32* solver_info = nullptr;
i32* solver_ipiv = nullptr;
f32* solver_buffer = nullptr;
f32* lhs = nullptr; // (ns*nc)^2
f32* rhs = nullptr; // ns*nc
f32* gamma = nullptr; // (nc+nw)*nw
f32* delta_gamma = nullptr; // nc*ns
f32* rollup_vertices = nullptr; // (nc+nw+1)*(ns+1)*3
f32* local_velocities = nullptr; // ns*nc*3
f32* trefftz_buffer = nullptr; // ns TODO: can we get rid of this ??
};

void data_alloc(const Allocator* allocator, Data* data, u64 n, u64 m);
void data_alloc(const malloc_f malloc, Data* data, u64 nc, u64 ns, u64 nw);
void data_free(const free_f free, Data* data);

// Flow characteristics
class FlowData {
Expand Down
Loading

0 comments on commit 06a83c1

Please sign in to comment.