diff --git a/headeronly/tinyview.hpp b/headeronly/tinyview.hpp index 1d22021..2b3a373 100644 --- a/headeronly/tinyview.hpp +++ b/headeronly/tinyview.hpp @@ -14,7 +14,7 @@ namespace tiny { using Range = std::array; -constexpr Range all{0,-1}; +constexpr Range all{0, -1}; template struct CountRanges; @@ -31,115 +31,122 @@ struct CountRanges { template class View { - public: - constexpr View(T* const ptr, const std::array& dims) : _ptr(ptr), _dims(dims), _strides(compute_strides(dims)) {} - constexpr View(T* const ptr, const std::array& dims, const std::array& strides) : _ptr(ptr), _dims(dims), _strides(strides) {} - constexpr View(const View& other) : _ptr(other._ptr), _dims(other._dims), _strides(other._strides) {} - template - TINY_INLINE constexpr T& operator()(std::size_t first, Idx... idx) {return _ptr[compute_index(first, idx...)];} - - template - TINY_INLINE constexpr T* ptr(std::size_t first, Idx... idx) {return _ptr + compute_index(first, idx...);} - - template - TINY_INLINE constexpr auto view(Args... args) { - constexpr std::size_t M = CountRanges::value; - static_assert(sizeof...(args) == N, "The number of indices must match the dimension N."); - static_assert(M <= N, "Too many ranges provided compared to the view's dimensionality"); - - T* newPtr = _ptr; - std::array newDims, newStrides; - std::size_t newDimIndex = 0; - - size_t argIndex = 0; - ([&](auto& arg) { - if constexpr (std::is_same, Range>::value) { - constexpr size_t first = arg[0]; - constexpr size_t last = (arg[1] < 0) ? _dims[argIndex] + arg[1] + 1 : arg[1]; - assert((first >= 0) && (first < _dims[argIndex])); - assert((last >= 0) && (last < _dims[argIndex])); - assert(last - first > 0); - newPtr += first * _strides[argIndex]; - newDims[newDimIndex] = last - first; - newStrides[newDimIndex] = _strides[argIndex]; - newDimIndex++; - } else if constexpr (std::is_integral>::value) { - constexpr size_t real_arg = (arg < 0) ? _dims[argIndex] + arg + 1 : arg; - newPtr += real_arg * _strides[argIndex]; - } - argIndex++; - }(args), ...); - - return View(newPtr, newDims, newStrides); - } - - TINY_INLINE constexpr T* data() {return _ptr;} - TINY_INLINE constexpr View& operator=(const View& other) = default; - - TINY_INLINE constexpr const std::size_t& dims(std::size_t idx) const {return _dims[idx];} - TINY_INLINE constexpr const std::size_t& strides(std::size_t idx) const {return _strides[idx];} - - private: - template - TINY_INLINE constexpr std::size_t compute_index(std::size_t first, Idx... idx) { - static_assert(sizeof...(idx) == N-1, "The number of indices must match the dimension N."); - static_assert((std::is_integral>::value && ...), "All indices must be integers"); - - #ifndef NDEBUG - std::size_t ii = 1; - assert(((idx < _dims[ii++]) && ...)); - #endif - - std::size_t index = first; - std::size_t i = 1; - ((index += idx * _strides[i++]), ...); - return index; - } - - TINY_INLINE constexpr static std::array compute_strides(const std::array& dims) { - std::array s{}; - s[0] = 1; - for (std::size_t i = 1; i < N; ++i) { - s[i] = s[i - 1] * dims[i - 1]; +public: + constexpr View(T* const ptr, const std::array& dims) + : _ptr(ptr), _dims(dims), _strides(compute_strides(dims)) {} + + constexpr View(T* const ptr, const std::array& dims, const std::array& strides) + : _ptr(ptr), _dims(dims), _strides(strides) {} + + constexpr View(const View& other) + : _ptr(other._ptr), _dims(other._dims), _strides(other._strides) {} + + template + TINY_INLINE T& operator()(std::size_t first, Idx... idx) const { return _ptr[compute_index(first, idx...)]; } + + template + TINY_INLINE T* ptr(std::size_t first, Idx... idx) const { return _ptr + compute_index(first, idx...); } + + template + TINY_INLINE auto view(Args... args) { // Removed 'constexpr' + constexpr std::size_t M = CountRanges::value; + static_assert(sizeof...(args) == N, "The number of indices must match the dimension N."); + static_assert(M <= N, "Too many ranges provided compared to the view's dimensionality"); + + T* newPtr = _ptr; + std::array newDims{}; + std::array newStrides{}; + std::size_t newDimIndex = 0; + + std::size_t argIndex = 0; + ([&](auto& arg) { + if constexpr (std::is_same, Range>::value) { + std::size_t first = arg[0]; // Removed 'constexpr' + std::size_t last = (arg[1] < 0) ? _dims[argIndex] + arg[1] + 1 : arg[1]; + assert((first >= 0) && (first < _dims[argIndex])); + assert((last >= 0) && (last <= _dims[argIndex])); // Changed '<' to '<=' for upper bound + assert(last - first > 0); + newPtr += first * _strides[argIndex]; + newDims[newDimIndex] = last - first; + newStrides[newDimIndex] = _strides[argIndex]; + newDimIndex++; + } else if constexpr (std::is_integral>::value) { + std::size_t real_arg = (arg < 0) ? _dims[argIndex] + arg : arg; // Removed '+1' as indexing starts from 0 + assert((real_arg >= 0) && (real_arg < _dims[argIndex])); + newPtr += real_arg * _strides[argIndex]; } - return s; + argIndex++; + }(args), ...); + + return View(newPtr, newDims, newStrides); + } + + TINY_INLINE T* data() const { return _ptr; } + TINY_INLINE View& operator=(const View& other) = default; + + TINY_INLINE const std::size_t& dims(std::size_t idx) const { return _dims[idx]; } + TINY_INLINE const std::size_t& strides(std::size_t idx) const { return _strides[idx]; } + +private: + template + std::size_t compute_index(std::size_t first, Idx... idx) const { // Removed 'constexpr' + static_assert(sizeof...(idx) == N-1, "The number of indices must match the dimension N."); + static_assert((std::is_integral>::value && ...), "All indices must be integers"); + + #ifndef NDEBUG + std::size_t ii = 0; // Changed from 1 to 0 to match zero-based indexing + ((assert(idx >= 0 && idx < _dims[ii++])), ...); + #endif + + std::size_t index = first; + std::size_t i = 1; + ((index += idx * _strides[i++]), ...); + return index; + } + + static std::array compute_strides(const std::array& dims) { + std::array s{}; + s[0] = 1; + for (std::size_t i = 1; i < N; ++i) { + s[i] = s[i - 1] * dims[i - 1]; } + return s; + } - T* const _ptr; - const std::array _dims; - const std::array _strides; + T* const _ptr; + const std::array _dims; + const std::array _strides; }; -} // namespace tiny +} // namespace tiny // #include // using namespace tiny; + // int main() { -// std::size_t n = 3; -// float a[27] = {}; +// constexpr int n = 3; +// float a[n * n * n] = {}; -// View av{&a[0], {n,n,n}}; - -// foo2(av); +// for (int i = 0; i < n * n * n; i++) { +// a[i] = static_cast(i); +// } -// View bv = av.view(all, -2, Range{0,3}); +// View av{&a[0], {static_cast(n), static_cast(n), static_cast(n)}}; + +// View bv = av.view(all, -2, Range{0, 3}); -// for (int j = 0; j < bv.dims(1); j++) { -// for (int i = 0; i < bv.dims(0); i++) { -// std::printf("%f ", bv(i,j)); +// for (std::size_t j = 0; j < bv.dims(1); j++) { +// for (std::size_t i = 0; i < bv.dims(0); i++) { +// std::printf("%f ", bv(i, j)); // } // std::printf("\n"); // } -// assert(bv() - av() == 3); // assert(bv.dims(0) == 3); // assert(bv.dims(1) == 3); // assert(bv(0,0) == 3.f); // assert(bv(2,1) == 14.f); // assert(bv(2,2) == 23.f); -// // for (int i = 0; i < 27; i++) { -// // std::printf("%f ", a[i]); -// // } // return 0; // } \ No newline at end of file diff --git a/tests/uvlm_2dof.cpp b/tests/uvlm_2dof.cpp new file mode 100644 index 0000000..dd9fe84 --- /dev/null +++ b/tests/uvlm_2dof.cpp @@ -0,0 +1,107 @@ +#include "vlm.hpp" +#include "vlm_utils.hpp" +#include "vlm_kinematics.hpp" + +#include "tinycombination.hpp" + +using namespace vlm; + +class NewmarkBeta { + public: + NewmarkBeta(std::unique_ptr memory, const f32 beta = 0.25f, const f32 gamma = 0.5f) : m_memory(std::move(memory)), m_beta(beta), m_gamma(gamma) {}; + ~NewmarkBeta() = default; + + void run(View>& M, View>& C, View>& K, View>& F, View>& u0, View>& v0, View>& t); + + private: + std::unique_ptr m_blas; + std::unique_ptr m_memory; + const f32 m_beta; + const f32 m_gamma; + + Buffer> K_eff{*m_memory}; // effective stiffness + Buffer> a0{*m_memory}; // initial acceleration + Buffer> du{*m_memory}; // incremental displacement + Buffer> factor{*m_memory}; // intermediary vector + Buffer> u{*m_memory}; // dof x tsteps position history + Buffer> v{*m_memory}; // dof x tsteps velocity history + Buffer> a{*m_memory}; // dof x tsteps acceleration history +}; + +void NewmarkBeta::run(View>& M, View>& C, View>& K, View>& F, View>& u0, View>& v0, View>& t) { + assert(M.layout.shape(0) == C.layout.shape(0)); + assert(M.layout.shape(1) == C.layout.shape(1)); + assert(C.layout.shape(0) == K.layout.shape(0)); + assert(C.layout.shape(1) == K.layout.shape(1)); + assert(K.layout.shape(0) == F.layout.shape(0)); + assert(u0.layout.shape(0) == F.layout.shape(0)); + assert(v0.layout.shape(0) == F.layout.shape(0)); + + Tensor<2> time_series{{F.layout.shape(0), t.layout.shape(0)}}; // dofs x timesteps + + // TODO: preallocate a number of timesteps and only resize when necessary + u.dealloc(); + v.dealloc(); + a.dealloc(); + + u.alloc(time_series); + v.alloc(time_series); + a.alloc(time_series); + + m_memory->copy(MemoryTransfer::DeviceToDevice, u.d_view().ptr, u0.ptr, u0.size_bytes()); + m_memory->copy(MemoryTransfer::DeviceToDevice, v.d_view().ptr, v0.ptr, v0.size_bytes()); + + // a[:,0] = np.linalg.solve(M, F[0] - C @ v0 - K @ x0) + View> a0_col0 = a.d_view().layout.slice(a.d_view().ptr, all, 0); + m_memory->copy(MemoryTransfer::DeviceToDevice, a0_col0.ptr, F.ptr, F.size_bytes()); + m_blas->gemv(-1.0f, C, v0, 1.0f, a0_col0); + m_blas->gemv(-1.0f, K, u0, 1.0f, a0_col0); +} + +int main() { + const u64 ni = 20; + const u64 nj = 5; + // vlm::Executor::instance(1); + const std::vector meshes = {"../../../../mesh/infinite_rectangular_" + std::to_string(ni) + "x" + std::to_string(nj) + ".x"}; + const std::vector backends = {"cpu"}; + + auto solvers = tiny::make_combination(meshes, backends); + + // Geometry + const f32 b = 0.5f; // half chord + + // Define simulation length + const f32 cycles = 10.0f; + const f32 u_inf = 1.0f; // freestream velocity + const f32 amplitude = 0.1f; // amplitude of the wing motion + const f32 k = 0.5; // reduced frequency + const f32 omega = k * 2.0f * u_inf / (2*b); + const f32 t_final = cycles * 2.0f * PI_f / omega; // 4 periods + //const f32 t_final = 5.0f; + + Kinematics kinematics{}; + + const f32 initial_angle = 0.0f; + + const auto initial_pose = rotation_matrix( + linalg::alias::float3{0.0f, 0.0f, 0.0f}, // take into account quarter chord panel offset + linalg::alias::float3{0.0f, 1.0f, 0.0f}, + to_radians(initial_angle) + ); + + // Sudden acceleration + const f32 alpha = to_radians(5.0f); + kinematics.add([=](const fwd::Float& t) { + return translation_matrix({ + -u_inf*std::cos(alpha)*t, + 0.0f, + -u_inf*std::sin(alpha)*t + }); + }); + + for (const auto& [mesh_name, backend_name] : solvers) { + UVLM simulation{backend_name, {mesh_name}}; + simulation.run({kinematics}, {initial_pose}, t_final); + } + return 0; +} \ No newline at end of file diff --git a/vlm/backends/cpu/include/vlm_backend_cpu.hpp b/vlm/backends/cpu/include/vlm_backend_cpu.hpp index d944d92..791c380 100644 --- a/vlm/backends/cpu/include/vlm_backend_cpu.hpp +++ b/vlm/backends/cpu/include/vlm_backend_cpu.hpp @@ -35,4 +35,13 @@ class BackendCPU final : public Backend { f32 mesh_area(const View& areas) override; }; +class BLAS_CPU final : public BLAS { + public: + BLAS_CPU() = default; + ~BLAS_CPU() = default; + + void gemv(const f32 alpha, const View>& A, const View>& x, const f32 beta, View>& y, Trans trans = Trans::No) override; + void gemm(const f32 alpha, const View>& A, const View>& B, const f32 beta, View>& C, Trans trans_a = Trans::No, Trans trans_b = Trans::No) override; +}; + } // namespace vlm \ No newline at end of file diff --git a/vlm/backends/cpu/src/vlm_backend_cpu.cpp b/vlm/backends/cpu/src/vlm_backend_cpu.cpp index 2acb5f3..e6dc3a5 100644 --- a/vlm/backends/cpu/src/vlm_backend_cpu.cpp +++ b/vlm/backends/cpu/src/vlm_backend_cpu.cpp @@ -39,6 +39,62 @@ void print_cpu_info() { std::printf("DEVICE: %s (%d threads)\n", cpuid.full_name.c_str(), std::thread::hardware_concurrency()); } +CBLAS_TRANSPOSE trans_to_cblas(BLAS_CPU::Trans trans) { + switch (trans) { + case BLAS_CPU::Trans::No: return CblasNoTrans; + case BLAS_CPU::Trans::Yes: return CblasTrans; + } +} + +void BLAS_CPU::gemv(const f32 alpha, const View>& A, const View>& x, const f32 beta, View>& y, Trans trans) { + assert(A.layout.stride(0) == 1); + + i32 m = (trans == BLAS_CPU::Trans::No) ? A.layout.shape(0) : A.layout.shape(1); + i32 n = (trans == BLAS_CPU::Trans::No) ? A.layout.shape(1) : A.layout.shape(0); + + cblas_sgemv( + CblasColMajor, + trans_to_cblas(trans), + m, + n, + alpha, + A.ptr, + static_cast(A.layout.stride(1)), + x.ptr, + static_cast(x.layout.stride(0)), + beta, + y.ptr, + static_cast(y.layout.stride(0)) + ); +} + +void BLAS_CPU::gemm(const f32 alpha, const View>& A, const View>& B, const f32 beta, View>& C, Trans trans_a, Trans trans_b) { + assert(A.layout.stride(0) == 1); + assert(B.layout.stride(0) == 1); + assert(C.layout.stride(0) == 1); + + i32 m = (trans_a == BLAS_CPU::Trans::No) ? A.layout.shape(0) : A.layout.shape(1); + i32 n = (trans_b == BLAS_CPU::Trans::No) ? B.layout.shape(1) : B.layout.shape(0); + i32 k = (trans_a == BLAS_CPU::Trans::No) ? A.layout.shape(1) : A.layout.shape(0); + + cblas_sgemm( + CblasColMajor, + trans_to_cblas(trans_a), + trans_to_cblas(trans_b), + m, + n, + k, + alpha, + A.ptr, + static_cast(A.layout.stride(1)), + B.ptr, + static_cast(B.layout.stride(1)), + beta, + C.ptr, + static_cast(C.layout.stride(1)) + ); +} + BackendCPU::BackendCPU() : Backend(std::make_unique()) { print_cpu_info(); } diff --git a/vlm/backends/cuda/include/vlm_backend_cuda.hpp b/vlm/backends/cuda/include/vlm_backend_cuda.hpp index 14d0636..23f0f8b 100644 --- a/vlm/backends/cuda/include/vlm_backend_cuda.hpp +++ b/vlm/backends/cuda/include/vlm_backend_cuda.hpp @@ -36,4 +36,13 @@ class BackendCUDA final : public Backend { f32 mesh_area(const View& areas) override; }; +class BLAS_CUDA final : public BLAS { + public: + BLAS_CUDA() = default; + ~BLAS_CUDA() = default; + + void gemv(const f32 alpha, const View>& A, const View>& x, const f32 beta, View>& y, Trans trans = Trans::No) override; + void gemm(const f32 alpha, const View>& A, const View>& B, const f32 beta, View>& C, Trans trans_a = Trans::No, Trans trans_b = Trans::No) override; +}; + } // namespace vlm \ No newline at end of file diff --git a/vlm/include/vlm_backend.hpp b/vlm/include/vlm_backend.hpp index 98879d3..26483f7 100644 --- a/vlm/include/vlm_backend.hpp +++ b/vlm/include/vlm_backend.hpp @@ -50,6 +50,16 @@ class Backend { virtual f32 mesh_area(const View& areas) = 0; }; +class BLAS { + public: + enum class Trans { Yes, No }; + BLAS() = default; + virtual ~BLAS() = default; + + virtual void gemv(const f32 alpha, const View>& A, const View>& x, const f32 beta, View>& y, Trans order = Trans::No) = 0; + virtual void gemm(const f32 alpha, const View>& A, const View>& B, const f32 beta, View>& C, Trans trans_a = Trans::No, Trans trans_b = Trans::No) = 0; +}; + std::unique_ptr create_backend(const std::string& backend_name); std::vector get_available_backends(); diff --git a/vlm/include/vlm_memory.hpp b/vlm/include/vlm_memory.hpp index 64b0882..eb883ad 100644 --- a/vlm/include/vlm_memory.hpp +++ b/vlm/include/vlm_memory.hpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace vlm { @@ -243,12 +244,29 @@ class Matrix { uint64_t _stride = 0; // leading dimension }; +using Range = std::array; + +constexpr Range all{0,-1}; + +template +struct CountRanges; + +template<> +struct CountRanges<> { + static constexpr std::size_t value = 0; +}; + +template +struct CountRanges { + static constexpr std::size_t value = std::is_same>::value + CountRanges::value; +}; + template class Tensor { public: - Tensor() = default; - Tensor(const std::array& shape, const std::array& strides) : _shape(shape), _strides(strides) {} - Tensor(const std::array& shape) : _shape(shape) { default_strides(); } + constexpr Tensor() = default; + constexpr Tensor(const std::array& shape, const std::array& strides) : _shape(shape), _strides(strides) {} + constexpr Tensor(const std::array& shape) : _shape(shape) { default_strides(); } std::size_t size() const { std::size_t size = 1; for (std::size_t i = 0; i < Dim; i++) size *= _shape[i]; @@ -257,7 +275,40 @@ class Tensor { uint64_t stride(int dim) const {return _strides[dim];} uint64_t shape(int dim) const {return _shape[dim];} - + + template + inline constexpr auto slice(T* ptr, Args... args) { + constexpr uint64_t M = CountRanges::value; + static_assert(sizeof...(args) == Dim, "The number of indices must match the dimension N."); + static_assert(M <= Dim, "Too many ranges provided compared to the view's dimensionality"); + + T* newPtr = ptr; + std::array newDims{}; + std::array newStrides{}; + uint64_t newDimIndex = 0; + + uint64_t argIndex = 0; + ([&](auto& arg) { + if constexpr (std::is_same, Range>::value) { + uint64_t first = arg[0]; // Removed 'constexpr' + uint64_t last = (arg[1] < 0) ? _shape[argIndex] + arg[1] + 1 : arg[1]; + assert((first >= 0) && (first < _shape[argIndex])); + assert((last >= 0) && (last <= _shape[argIndex])); // Changed '<' to '<=' for upper bound + assert(last - first > 0); + newPtr += first * _strides[argIndex]; + newDims[newDimIndex] = last - first; + newStrides[newDimIndex] = _strides[argIndex]; + newDimIndex++; + } else if constexpr (std::is_integral>::value) { + uint64_t real_arg = (arg < 0) ? _shape[argIndex] + arg : arg; // Removed '+1' as indexing starts from 0 + assert((real_arg >= 0) && (real_arg < _shape[argIndex])); + newPtr += real_arg * _strides[argIndex]; + } + argIndex++; + }(args), ...); + + return View>(newPtr, Tensor{newDims, newStrides}); + } private: constexpr void default_strides() { _strides[0] = 1; diff --git a/vlm/include/vlm_types.hpp b/vlm/include/vlm_types.hpp index d62231f..f257de2 100644 --- a/vlm/include/vlm_types.hpp +++ b/vlm/include/vlm_types.hpp @@ -1,5 +1,6 @@ #pragma once +// TODO: remove all unnecessary includes, im currently using this header as a pch (it's not) #include #include #include @@ -9,7 +10,7 @@ #include #include -#include "linalg.h" +#include "linalg.h" // TODO: remove namespace vlm {