Some work on MatrixPowersKernel.

ypodlesov · May 19, 2024 · e185165 · e185165
1 parent 2e1f4e9
commit e185165
Show file tree

Hide file tree

Showing 6 changed files with 145 additions and 28 deletions.
diff --git a/basic_la/common_container.h b/basic_la/common_container.h
@@ -2,6 +2,7 @@
 #include "helpers.h"
 #include <cassert>
 #include <cstddef>
+#include <cstdint>
 #include <cstdlib>
 #include <ctime>
 #include <type_traits>
@@ -14,7 +15,7 @@ struct CommonContainer {
 
     CommonContainer(int64_t mem_size)
         : mem_size_{mem_size}
-        , data_{new T[mem_size_]}
+        , data_{new T[mem_size]}
     {
     }
 
@@ -37,6 +38,12 @@ struct CommonContainer {
         other.mem_size_ = 0;
     }
 
+    CommonContainer(T* data, int64_t mem_size) 
+        : mem_size_{mem_size}
+        , data_{data}
+    {
+    }
+
     CommonContainer& operator =(const CommonContainer& other) {
         if (data_ && Hold) {
             delete[] data_;

diff --git a/basic_la/common_matrix.h b/basic_la/common_matrix.h
@@ -3,6 +3,7 @@
 #include "helpers.h"
 #include "vector.h"
 #include <cassert>
+#include <cstdint>
 #include <utility>
 
 template <typename SpecMatrix, typename T, bool Hold = true>
@@ -47,6 +48,13 @@ struct CommonMatrix: public CommonContainer<CommonMatrix<SpecMatrix, T, Hold>, T
         return *this;
     }
 
+    CommonMatrix(T* data, const int64_t mem_size, const int64_t row_cnt, const int64_t col_cnt)
+        : Base{data, mem_size}
+        , row_cnt_{row_cnt}
+        , col_cnt_{col_cnt}
+    {
+    }
+
     inline T Get(int64_t row, int64_t col) const {
         T result;
         static_cast<const SpecMatrix*>(this)->GetImpl(row, col, result);

diff --git a/basic_la/sparse_matrix.h b/basic_la/sparse_matrix.h
@@ -1,29 +1,34 @@
 #pragma once
 #include "common_matrix.h"
+#include <cstdint>
 #include <cstring>
 #include <iostream>
 #include <random>
 #include <set>
 #include <utility>
 
-template <typename T>
-struct SparseMatrix: public CommonMatrix<SparseMatrix<double>, double> {
-    using Base = CommonMatrix<SparseMatrix<double>, double>;
+template <typename T, bool Hold = true>
+struct SparseMatrix: public CommonMatrix<SparseMatrix<T, Hold>, T, Hold> {
+    using Base = CommonMatrix<SparseMatrix<T, Hold>, T, Hold>;
+    using Base::mem_size_;
+    using Base::data_;
+    using Base::row_cnt_;
+    using Base::col_cnt_;
 
     SparseMatrix() = default;
 
     SparseMatrix(int64_t row_cnt, int64_t col_cnt, int64_t non_zero)
         : Base(row_cnt, col_cnt, std::min(non_zero, row_cnt * col_cnt))
-        , j_a_{new int64_t[mem_size_]}
+        , j_a_{new int64_t[std::min(non_zero, row_cnt * col_cnt)]}
         , i_a_{new int64_t[row_cnt + 1]}
     {
         std::memset(i_a_, 0, (row_cnt_ + 1) * sizeof(decltype(row_cnt)));
     }
 
     SparseMatrix(const SparseMatrix& other)
         : Base(other)
-        , j_a_{new int64_t[mem_size_]}
-        , i_a_{new int64_t[row_cnt_ + 1]}
+        , j_a_{new int64_t[other.mem_size]}
+        , i_a_{new int64_t[other.row_cnt + 1]}
     {
         std::memcpy(j_a_, other.j_a_, mem_size_ * sizeof(decltype(col_cnt_)));
         std::memcpy(i_a_, other.i_a_, (row_cnt_ + 1) * sizeof(decltype(row_cnt_)));
@@ -38,6 +43,17 @@ struct SparseMatrix: public CommonMatrix<SparseMatrix<double>, double> {
         other.i_a_ = nullptr;
     }
 
+    SparseMatrix(const SparseMatrix<T>& other, const int64_t row_start, const int64_t row_end)
+        : SparseMatrix{row_end - row_start, other.col_cnt_, std::max<int64_t>(other.i_a_[row_end] - other.i_a_[row_start], 0)}
+    {
+        std::memcpy(data_, &other.data_[other.i_a_[row_start]], mem_size_ * sizeof(T));
+        std::memcpy(j_a_, &other.j_a_[other.i_a_[row_start]], mem_size_ * sizeof(int64_t));
+        for (int64_t i = row_start; i < row_end; ++i) {
+            i_a_[i] = std::max<int64_t>(other.i_a_[i] - other.i_a_[row_start], 0);
+        }
+        i_a_[row_end - row_start] = row_cnt_ * col_cnt_;
+    }
+
     SparseMatrix& operator =(const SparseMatrix& other) {
         Base::operator =(other);
         j_a_ = new int64_t[mem_size_];
@@ -125,16 +141,15 @@ struct SparseMatrix: public CommonMatrix<SparseMatrix<double>, double> {
         }
     }
 
-
     ~SparseMatrix() {
-        if (j_a_) {
+        if (j_a_ && Hold) {
             delete[] j_a_;
-            j_a_ = nullptr;
         }
-        if (i_a_) {
+        j_a_ = nullptr;
+        if (i_a_ && Hold) {
             delete[] i_a_;
-            i_a_ = nullptr;
         }
+        i_a_ = nullptr;
     }
 
     int64_t* j_a_{nullptr};

diff --git a/matrix_powers_mv/matrix_powers_mv.cpp b/matrix_powers_mv/matrix_powers_mv.cpp
@@ -1,10 +1,15 @@
 #include "matrix_powers_mv.h"
+#include "helpers.h"
+#include "sparse_matrix.h"
+#include <cassert>
 #include <cstdint>
 #include <cstring>
+#include <thread>
 
 bool ReorderMatrix(SparseMatrix<double>& sp_matrix) {
     SparseMatrix<double> a_no_diag;
     sp_matrix.RemoveDiag(a_no_diag);
+
     idx_t* perm = new idx_t[a_no_diag.row_cnt_];
     idx_t* iperm = new idx_t[a_no_diag.row_cnt_];
     idx_t options[METIS_NOPTIONS];
@@ -20,7 +25,7 @@ bool ReorderMatrix(SparseMatrix<double>& sp_matrix) {
         , perm
         , iperm
     );
-    
+
     if (result != METIS_OK) {
         return false;
     }
@@ -33,9 +38,65 @@ bool ReorderMatrix(SparseMatrix<double>& sp_matrix) {
     return true;
 }
 
-bool MatrixPowersMV(SparseMatrix<double>& sp_matrix, const Vector<double>& /* x */, const std::vector<Vector<double>*>& /* res */) {
-    if (!ReorderMatrix(sp_matrix)) {
-        return false;
+bool MatrixPowersMV(SparseMatrix<double>& sp_matrix, const Vector<double>& x, std::vector<Vector<double>>& res) {
+    // if (!ReorderMatrix(sp_matrix)) {
+    //     return false;
+    // }
+    Vector<double> prev_x = x;
+    for (auto cur_x : res) {
+        assert(prev_x.mem_size_ == x.mem_size_);
+        assert(cur_x.mem_size_ == x.mem_size_);
+        NHelpers::Nullify(cur_x.data_, cur_x.mem_size_);
+
+        int64_t cores_num = std::thread::hardware_concurrency();
+        assert(sp_matrix.row_cnt_ % cores_num == 0);
+        const int64_t row_step = sp_matrix.row_cnt_ / cores_num;
+
+        {
+            std::vector<std::pair<SparseMatrix<double>, Vector<double>>> matrix_blocks(cores_num);
+            std::vector<std::thread> threads;
+            {
+                int64_t cur_row_start = 0;
+                for (auto& [matrix_block, vector_block] : matrix_blocks) {
+                    if (sp_matrix.i_a_[cur_row_start + row_step] - sp_matrix.i_a_[cur_row_start] <= 0) {
+                        continue;
+                    }
+                    matrix_block = SparseMatrix<double>(sp_matrix, cur_row_start, cur_row_start + row_step);
+                    for (int64_t i = 0; i < matrix_block.mem_size_; ++i) {
+                        assert(NHelpers::RoughEq(matrix_block.data_[i], sp_matrix.data_[sp_matrix.i_a_[cur_row_start] + i], 1e-6));
+                    }
+                    [[maybe_unused]] double* tmp_ptr = new double[512];
+                    vector_block = Vector<double>(row_step);
+                    const int64_t local_row_start = cur_row_start;
+
+                    // async compute corresponding component
+                    threads.emplace_back([row_step](
+                            const SparseMatrix<double>& matrix_block
+                            , Vector<double>& vector_block
+                            , const Vector<double>& cur_x
+                            , const Vector<double>& prev_x
+                            , const int64_t local_row_start) {
+                        matrix_block.VecMult(prev_x, vector_block);
+                        for (int64_t i = local_row_start; i < local_row_start + row_step; ++i) {
+                            cur_x.data_[i] = vector_block.data_[i - local_row_start];
+                        }
+                    }, std::ref(matrix_block)
+                    , std::ref(vector_block)
+                    , cur_x
+                    , prev_x
+                    , local_row_start);
+                    cur_row_start += row_step;
+                }
+                for (auto&& thread : threads) {
+                    thread.join();
+                }
+                Vector<double> cmp_res(x.mem_size_);
+                sp_matrix.VecMult(prev_x, cmp_res);
+                cmp_res.PlusAX(cur_x, -1);
+                std::cout << cmp_res.Norm2() << std::endl;
+            }
+        }
+        prev_x = cur_x;
     }
     return true;
 }
diff --git a/matrix_powers_mv/matrix_powers_mv.h b/matrix_powers_mv/matrix_powers_mv.h
@@ -5,4 +5,4 @@
 #include <vector.h>
 
 bool ReorderMatrix(SparseMatrix<double>& sp_matrix);
-bool MatrixPowersMV(SparseMatrix<double>& sp_matrix, const Vector<double>& x, const std::vector<Vector<double>*>& res);
+bool MatrixPowersMV(SparseMatrix<double>& sp_matrix, const Vector<double>& x, std::vector<Vector<double>>& res);
diff --git a/matrix_powers_mv/test.cpp b/matrix_powers_mv/test.cpp
@@ -1,3 +1,4 @@
+#include "helpers.h"
 #include "matrix_powers_mv.h"
 #include "sparse_matrix.h"
 #include <filesystem>
@@ -9,7 +10,7 @@
 #include <catch2/generators/catch_generators_all.hpp>
 #include <cstdint>
 
-static void Test(const uint32_t n) {
+static void Test(const int64_t n) {
     SparseMatrix<double> a_no_diag;
     SparseMatrix<double> a;
     {
@@ -22,20 +23,45 @@ static void Test(const uint32_t n) {
         fstream.close();
         REQUIRE(a.data_);
     }
-    std::vector<Vector<double>*> res;
     Vector<double> x(n);
-    NHelpers::GenRandomVector(x, n, true);
-
-    REQUIRE(MatrixPowersMV(a, x, res));
-}
+    for (int64_t i = 0; i < n; ++i) {
+        x.data_[i] = i;
+    }
+    // NHelpers::GenRandomVector(x, n, true);
+    int64_t s = std::min<int64_t>(n, 1 << 10);
+    std::vector<Vector<double>> result(s);
+    for (auto& single_res : result) {
+        single_res = Vector<double>(n);
+    }
+    REQUIRE(MatrixPowersMV(a, x, result));
+    REQUIRE(static_cast<int64_t>(result.size()) == s);
 
-TEST_CASE("Size 128") {
-    Test(128);
+    { // check results
+        // constexpr double eps = 1e-6;
+        Vector<double> last_res(x);
+        for (int64_t i = 0; i < s; ++i) {
+            Vector<double> local_res(n);
+            a.VecMult(last_res, local_res);
+            // std::cout << local_res.Norm2() << std::endl;
+            local_res.PlusAX(result[i], -1);
+            // std::cout << local_res.Norm2() << std::endl;
+            // REQUIRE(NHelpers::RoughEq(local_res.Norm2(), 0.0, eps));
+            last_res = local_res;
+        }
+    }
 }
 
-TEST_CASE("Size 256") {
-    Test(256);
-}
+// TEST_CASE("Size 4") {
+//     Test(4);
+// }
+
+// TEST_CASE("Size 128") {
+//     Test(128);
+// }
+
+// TEST_CASE("Size 256") {
+//     Test(256);
+// }
 
 TEST_CASE("Size 512") {
     Test(512);