diff --git a/basic_la/matrix_powers_mv.cpp b/basic_la/matrix_powers_mv.cpp
index 747f604..ea8b018 100644
--- a/basic_la/matrix_powers_mv.cpp
+++ b/basic_la/matrix_powers_mv.cpp
@@ -39,12 +39,80 @@ bool ReorderMatrix(const SparseMatrix<double>& sp_matrix, SparseMatrix<double>&
     return true;
 }
 
+// bool MatrixPowersMV(const SparseMatrix<double>& sp_matrix, const Vector<double>& x, std::vector<Vector<double>>& res) {
+//     res.front() = x;
+
+//     const int64_t cores_num = std::min<int64_t>(x.mem_size_, std::thread::hardware_concurrency());
+//     const int64_t row_step = sp_matrix.row_cnt_ / cores_num;
+//     std::vector<std::pair<SparseMatrix<double>, Vector<double>>> matrix_blocks(cores_num);
+//     {
+//         int64_t cur_row_start = 0;
+//         for (auto& [matrix_block, vector_block] : matrix_blocks) {
+//             if (sp_matrix.i_a_[cur_row_start + row_step] - sp_matrix.i_a_[cur_row_start] <= 0) {
+//                 continue;
+//             }
+//             matrix_block = SparseMatrix<double>(sp_matrix, cur_row_start, cur_row_start + row_step);
+//             vector_block = Vector<double>(row_step);
+//         }
+//     }
+
+//     for (auto cur_x_iter = std::next(res.begin()); cur_x_iter != res.end(); ++cur_x_iter) {
+//         const auto& prev_x = *std::prev(cur_x_iter);
+//         auto& cur_x = *cur_x_iter;
+//         assert(prev_x.mem_size_ == x.mem_size_);
+//         assert(cur_x.mem_size_ == x.mem_size_);
+         
+//         std::vector<std::thread> threads;
+//         threads.reserve(cores_num);
+//         int64_t cur_row_start = 0;
+//         for (auto& [matrix_block, vector_block] : matrix_blocks) {
+//             const int64_t local_row_start = cur_row_start;
+
+//             // async compute corresponding component
+//             threads.emplace_back([row_step](
+//                     const SparseMatrix<double>& matrix_block
+//                     , Vector<double>& vector_block
+//                     , const Vector<double>& cur_x
+//                     , const Vector<double>& prev_x
+//                     , const int64_t local_row_start) {
+//                 matrix_block.VecMult(prev_x, vector_block);
+//                 for (int64_t i = local_row_start; i < local_row_start + row_step; ++i) {
+//                     cur_x.data_[i] = vector_block.data_[i - local_row_start];
+//                 }
+//             }, std::ref(matrix_block)
+//             , std::ref(vector_block)
+//             , std::ref(cur_x)
+//             , std::ref(prev_x)
+//             , local_row_start);
+
+//             cur_row_start += row_step;
+//         }
+//         for (auto&& thread : threads) {
+//             thread.join();
+//         }
+//     }
+//     return true;
+// }
+
 bool MatrixPowersMV(const SparseMatrix<double>& sp_matrix, const Vector<double>& x, std::vector<Vector<double>>& res) {
-    // SparseMatrix<double> reordered;
-    // if (!ReorderMatrix(sp_matrix, reordered)) {
-    //     return false;
-    // }
     res.front() = x;
+
+    const int64_t cores_num = std::min<int64_t>(x.mem_size_, std::thread::hardware_concurrency());
+    assert(sp_matrix.row_cnt_ % cores_num == 0);
+    const int64_t row_step = sp_matrix.row_cnt_ / cores_num;
+    std::vector<std::pair<SparseMatrix<double>, Vector<double>>> matrix_blocks(cores_num);
+    {
+        int64_t cur_row_start = 0;
+        for (auto& [matrix_block, vector_block] : matrix_blocks) {
+            if (sp_matrix.i_a_[cur_row_start + row_step] - sp_matrix.i_a_[cur_row_start] <= 0) {
+                continue;
+            }
+            vector_block = Vector<double>(row_step);
+            matrix_block = SparseMatrix<double>(sp_matrix, cur_row_start, cur_row_start + row_step);
+            cur_row_start += row_step;
+        }
+    }
+
     for (auto cur_x_iter = std::next(res.begin()); cur_x_iter != res.end(); ++cur_x_iter) {
         auto& prev_x = *std::prev(cur_x_iter);
         auto& cur_x = *cur_x_iter;
@@ -52,44 +120,19 @@ bool MatrixPowersMV(const SparseMatrix<double>& sp_matrix, const Vector<double>&
         assert(cur_x.mem_size_ == x.mem_size_);
         NHelpers::Nullify(cur_x.data_, cur_x.mem_size_);
         
-        int64_t cores_num = std::min<int64_t>(x.mem_size_, std::thread::hardware_concurrency());
-        assert(sp_matrix.row_cnt_ % cores_num == 0);
-        const int64_t row_step = sp_matrix.row_cnt_ / cores_num;
-
         {
-            std::vector<std::pair<SparseMatrix<double>, Vector<double>>> matrix_blocks(cores_num);
             std::vector<std::thread> threads;
             threads.reserve(cores_num);
             {
                 int64_t cur_row_start = 0;
                 for (auto& [matrix_block, vector_block] : matrix_blocks) {
-                    if (sp_matrix.i_a_[cur_row_start + row_step] - sp_matrix.i_a_[cur_row_start] <= 0) {
-                        continue;
-                    }
-                    matrix_block = SparseMatrix<double>(sp_matrix, cur_row_start, cur_row_start + row_step);
-                    for (int64_t i = 0; i < matrix_block.mem_size_; ++i) {
-                        assert(NHelpers::RoughEq(matrix_block.data_[i], sp_matrix.data_[sp_matrix.i_a_[cur_row_start] + i], 1e-6));
-                    }
-                    [[maybe_unused]] double* tmp_ptr = new double[512];
-                    vector_block = Vector<double>(row_step);
-                    const int64_t local_row_start = cur_row_start;
-
                     // async compute corresponding component
-                    threads.emplace_back([row_step](
-                            const SparseMatrix<double>& matrix_block
-                            , Vector<double>& vector_block
-                            , const Vector<double>& cur_x
-                            , const Vector<double>& prev_x
-                            , const int64_t local_row_start) {
+                    threads.emplace_back([&, cur_row_start, row_step]() {
                         matrix_block.VecMult(prev_x, vector_block);
-                        for (int64_t i = local_row_start; i < local_row_start + row_step; ++i) {
-                            cur_x.data_[i] = vector_block.data_[i - local_row_start];
+                        for (int64_t i = cur_row_start; i < cur_row_start + row_step; ++i) {
+                            cur_x.data_[i] = vector_block.data_[i - cur_row_start];
                         }
-                    }, std::ref(matrix_block)
-                    , std::ref(vector_block)
-                    , std::ref(cur_x)
-                    , std::ref(prev_x)
-                    , local_row_start);
+                    });
                     cur_row_start += row_step;
                 }
                 for (auto&& thread : threads) {
diff --git a/matrix_powers_mv/CMakeLists.txt b/matrix_powers_mv/CMakeLists.txt
index a1d324b..3a98e3a 100644
--- a/matrix_powers_mv/CMakeLists.txt
+++ b/matrix_powers_mv/CMakeLists.txt
@@ -1,3 +1,7 @@
 add_catch(test_matrix_powers_mv test.cpp)
 target_include_directories(test_matrix_powers_mv PRIVATE ${BASIC_LA_PATH})
-target_link_libraries(test_matrix_powers_mv PRIVATE BASIC_LA)
\ No newline at end of file
+target_link_libraries(test_matrix_powers_mv PRIVATE BASIC_LA)
+
+add_catch(bench_matrix_powers_mv run.cpp)
+target_include_directories(bench_matrix_powers_mv PRIVATE ${BASIC_LA_PATH})
+target_link_libraries(bench_matrix_powers_mv PRIVATE BASIC_LA)
\ No newline at end of file
diff --git a/matrix_powers_mv/run.cpp b/matrix_powers_mv/run.cpp
new file mode 100644
index 0000000..d26c772
--- /dev/null
+++ b/matrix_powers_mv/run.cpp
@@ -0,0 +1,93 @@
+#include <filesystem>
+#include <fstream>
+#include <sstream>
+#include <matrix_powers_mv.h>
+
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/benchmark/catch_benchmark.hpp>
+
+void PrepareMatrix(SparseMatrix<double>& a, const size_t n) {
+    std::stringstream file_name;
+    file_name << std::filesystem::current_path().string() << "/../" << "matrix_examples/sparse_spd/" << n;
+    std::ifstream fstream;
+    fstream.open(file_name.str());
+    REQUIRE(fstream.is_open());
+    fstream >> a;
+    fstream.close();
+    REQUIRE(a.data_);
+}
+
+TEST_CASE("Benchmark CG") {
+    {
+        constexpr int64_t n = 1024;
+        constexpr int64_t s = 512;
+        SparseMatrix<double> a_sparse;
+        PrepareMatrix(a_sparse, n);
+        std::vector<Vector<double>> result(s);
+        for (auto& single_res : result) {
+            single_res = Vector<double>(n);
+        }
+        Vector<double> b;
+        NHelpers::GenRandomVector(b, n, true);
+        BENCHMARK("MatrixPowersMV with s=512, size=1024:") {
+            MatrixPowersMV(a_sparse, b, result);
+        };
+
+        BENCHMARK("Sequential MatVec's with s=512, size=1024:") {
+            Vector<double> cur_vector = b;
+            for (int64_t i = 1; i < s; ++i) {
+                Vector<double> cur_result(n);
+                a_sparse.VecMult(cur_vector, cur_result);
+                cur_vector = std::move(cur_result);
+            }
+        };
+    }
+    {
+        constexpr int64_t n = 8192;
+        constexpr int64_t s = 4096;
+        SparseMatrix<double> a_sparse;
+        PrepareMatrix(a_sparse, n);
+        std::vector<Vector<double>> result(s);
+        for (auto& single_res : result) {
+            single_res = Vector<double>(n);
+        }
+        Vector<double> b;
+        NHelpers::GenRandomVector(b, n, true);
+        BENCHMARK("MatrixPowersMV with s=4096, size=8192:") {
+            MatrixPowersMV(a_sparse, b, result);
+        };
+
+        BENCHMARK("Sequential MatVec's with s=4096, size=8192:") {
+            Vector<double> cur_vector = b;
+            for (int64_t i = 1; i < s; ++i) {
+                Vector<double> cur_result(n);
+                a_sparse.VecMult(cur_vector, cur_result);
+                cur_vector = std::move(cur_result);
+            }
+        };
+    }
+    {
+        constexpr int64_t n = 16384;
+        constexpr int64_t s = 8192;
+        SparseMatrix<double> a_sparse;
+        PrepareMatrix(a_sparse, n);
+        std::vector<Vector<double>> result(s);
+        for (auto& single_res : result) {
+            single_res = Vector<double>(n);
+        }
+        Vector<double> b;
+        NHelpers::GenRandomVector(b, n, true);
+        BENCHMARK("MatrixPowersMV with s=8192, size=16384:") {
+            MatrixPowersMV(a_sparse, b, result);
+        };
+
+        BENCHMARK("Sequential MatVec's with s=8192, size=16384:") {
+            Vector<double> cur_vector = b;
+            for (int64_t i = 1; i < s; ++i) {
+                Vector<double> cur_result(n);
+                a_sparse.VecMult(cur_vector, cur_result);
+                cur_vector = std::move(cur_result);
+            }
+        };
+    }
+}
diff --git a/matrix_powers_mv/test.cpp b/matrix_powers_mv/test.cpp
index 2c2d5ec..299f7f6 100644
--- a/matrix_powers_mv/test.cpp
+++ b/matrix_powers_mv/test.cpp
@@ -58,4 +58,12 @@ TEST_CASE("Size 256") {
 
 TEST_CASE("Size 512") {
     Test(512);
+}
+
+TEST_CASE("Size 1024") {
+    Test(1024);
+}
+
+TEST_CASE("Size 2048") {
+    Test(2048);
 }
\ No newline at end of file