diff --git a/femutils/CsrFormatMatrix.cc b/femutils/CsrFormatMatrix.cc index 33c793a..70abfbc 100644 --- a/femutils/CsrFormatMatrix.cc +++ b/femutils/CsrFormatMatrix.cc @@ -65,14 +65,14 @@ translateToLinearSystem(DoFLinearSystem& linear_system) if (((i + 1) < nb_row) && (m_matrix_row(i) == m_matrix_row(i + 1))) continue; for (Int32 j = m_matrix_row(i); ((i + 1) < nb_row && j < m_matrix_row(i + 1)) || ((i + 1) == nb_row && j < m_matrix_column.dim1Size()); j++) { - if (DoFLocalId(m_matrix_column(j)).isNull()) - continue; - //info() << "Add: (" << i << ", " << m_matrix_column(j) << " v=" << m_matrix_value(j); if (do_set_csr){ ++m_matrix_rows_nb_column[i]; + continue; } - else - linear_system.matrixAddValue(DoFLocalId(i), DoFLocalId(m_matrix_column(j)), m_matrix_value(j)); + if (DoFLocalId(m_matrix_column(j)).isNull()) + continue; + //info() << "Add: (" << i << ", " << m_matrix_column(j) << " v=" << m_matrix_value(j); + linear_system.matrixAddValue(DoFLocalId(i), DoFLocalId(m_matrix_column(j)), m_matrix_value(j)); } } diff --git a/femutils/HypreDoFLinearSystem.cc b/femutils/HypreDoFLinearSystem.cc index dee2c74..04f2ec7 100644 --- a/femutils/HypreDoFLinearSystem.cc +++ b/femutils/HypreDoFLinearSystem.cc @@ -16,11 +16,14 @@ #include #include #include +#include #include #include #include #include +#include +#include #include @@ -33,6 +36,10 @@ #include #include +// NOTE: +// DoF family must be compacted (i.e maxLocalId()==nbItem()) and sorted +// for this implementation to works. + /*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ @@ -86,18 +93,22 @@ class HypreDoFLinearSystemImpl , m_dof_matrix_indexes(VariableBuildInfo(m_dof_family, solver_name + "DoFMatrixIndexes")) , m_dof_elimination_info(VariableBuildInfo(m_dof_family, solver_name + "DoFEliminationInfo")) , m_dof_elimination_value(VariableBuildInfo(m_dof_family, solver_name + "DoFEliminationValue")) + , m_dof_matrix_numbering(VariableBuildInfo(dof_family, solver_name + "MatrixNumbering")) { info() << "Creating HypreDoFLinearSystemImpl()"; } ~HypreDoFLinearSystemImpl() { + info() << "Calling HYPRE_Finalize"; + HYPRE_Finalize(); /* must be the last HYPRE function call */ } public: void build() { + HYPRE_Init(); /* must be the first HYPRE function call */ } public: @@ -161,27 +172,95 @@ class HypreDoFLinearSystemImpl VariableDoFInt32 m_dof_matrix_indexes; VariableDoFByte m_dof_elimination_info; VariableDoFReal m_dof_elimination_value; + VariableDoFInt32 m_dof_matrix_numbering; + NumArray m_parallel_columns_index; + NumArray m_parallel_rows_index; + //! Work array to store values of solution vector in parallel + NumArray m_result_work_values; Runner* m_runner = nullptr; CSRFormatView m_csr_view; + Int32 m_first_own_row = -1; + Int32 m_nb_own_row = -1; + + private: + + void _computeMatrixNumerotation(); }; /*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ void HypreDoFLinearSystemImpl:: -solve() +_computeMatrixNumerotation() { - HYPRE_Init(); /* must be the first HYPRE function call */ + IParallelMng* pm = m_dof_family->parallelMng(); + const bool is_parallel = pm->isParallel(); + const Int32 nb_rank = pm->commSize(); + const Int32 my_rank = pm->commRank(); + + DoFGroup all_dofs = m_dof_family->allItems(); + DoFGroup own_dofs = all_dofs.own(); + const Int32 nb_own_row = own_dofs.size(); + + Int32 own_first_index = 0; + + if (is_parallel) { + // TODO: utiliser un Scan lorsque ce sera disponible dans Arcane + UniqueArray parallel_rows_index(nb_rank, 0); + pm->allGather(ConstArrayView(1, &nb_own_row), parallel_rows_index); + info() << "ALL_NB_ROW = " << parallel_rows_index; + for (Int32 i = 0; i < my_rank; ++i) + own_first_index += parallel_rows_index[i]; + } + + info() << "OwnFirstIndex=" << own_first_index << " NbOwnRow=" << nb_own_row; + + m_first_own_row = own_first_index; + m_nb_own_row = nb_own_row; + + // TODO: Faire avec API accelerateur + ENUMERATE_DOF (idof, own_dofs) { + DoF dof = *idof; + m_dof_matrix_numbering[idof] = own_first_index + idof.index(); + //info() << "Numbering dof_uid=" << dof.uniqueId() << " M=" << m_dof_matrix_numbering[idof]; + } + info() << " nb_own_row=" << nb_own_row << " nb_item=" << m_dof_family->nbItem(); + m_dof_matrix_numbering.synchronize(); + m_parallel_rows_index.resize(nb_own_row); + m_result_work_values.resize(nb_own_row); +} + +/*---------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ + +void HypreDoFLinearSystemImpl:: +solve() +{ HYPRE_MemoryLocation hypre_memory = HYPRE_MEMORY_HOST; HYPRE_ExecutionPolicy hypre_exec_policy = HYPRE_EXEC_HOST; + // Récupère le communicateur MPI associé + IParallelMng* pm = m_dof_family->parallelMng(); + Parallel::Communicator arcane_comm = pm->communicator(); + MPI_Comm mpi_comm = MPI_COMM_WORLD; + if (arcane_comm.isValid()) + mpi_comm = static_cast(arcane_comm); + + bool is_parallel = pm->isParallel(); + const Int32 nb_rank = pm->commSize(); + const Int32 my_rank = pm->commRank(); + + // TODO: A ne faire qu'un fois sauf si les DoFs évoluent + _computeMatrixNumerotation(); + bool is_use_device = false; if (m_runner) { is_use_device = isAcceleratorPolicy(m_runner->executionPolicy()); info() << "Runner for Hypre=" << m_runner->executionPolicy() << " is_device=" << is_use_device; } + // Si HYPRE n'est pas compilé avec le support GPU, alors on utilise l'hôte. // (NOTE: a priori il n'y a pas besoin de faire cela. Si Hypre n'est pas compilé avec // support GPU alors HYPRE_MEMORY_DEVICE <=> HYPRE_MEMORY_HOST @@ -215,28 +294,29 @@ solve() //HYPRE_SetGPUMemoryPoolSize(bin_growth, min_bin, max_bin, max_bytes); /* setup IJ matrix A */ - // TODO: Utiliser le bon communicateur en parallèle. - MPI_Comm comm = MPI_COMM_WORLD; HYPRE_IJMatrix ij_A = nullptr; HYPRE_ParCSRMatrix parcsr_A = nullptr; - const int first_row = 0; - const int nb_row = m_csr_view.rows().size(); - info() << "NB_ROW=" << nb_row; - const int last_row = first_row + nb_row - 1; - { - int first_col = first_row; - int last_col = first_col + nb_row - 1; - info() << "CreateMatrix row=" << first_row << ", " << last_row - << " col=" << first_col << ", " << last_col; - HYPRE_IJMatrixCreate(comm, first_row, last_row, first_col, last_col, &ij_A); + const bool do_debug_print = false; + const bool do_dump_matrix = false; + + Span rows_index_span = m_dof_matrix_numbering.asArray(); + const Int32 nb_local_row = rows_index_span.size(); + + if (do_debug_print) { + info() << "ROWS_INDEX=" << rows_index_span; + info() << "ROWS=" << m_csr_view.rows(); + info() << "ROWS_NB_COLUMNS=" << m_csr_view.rowsNbColumn(); + info() << "COLUMNS=" << m_csr_view.columns(); + info() << "VALUE=" << m_csr_view.values(); } - NumArray rows_index(nb_row); - for (Int32 i = 0; i < nb_row; ++i) - rows_index[i] = i; - Span rows_index_span = rows_index.to1DSpan(); + const int first_row = m_first_own_row; + const int last_row = m_first_own_row + m_nb_own_row - 1; + + info() << "CreateMatrix first_row=" << first_row << " last_row " << last_row; + HYPRE_IJMatrixCreate(mpi_comm, first_row, last_row, first_row, last_row, &ij_A); int* rows_nb_column_data = const_cast(m_csr_view.rowsNbColumn().data()); @@ -244,41 +324,108 @@ solve() HYPRE_IJMatrixSetObjectType(ij_A, HYPRE_PARCSR); HYPRE_IJMatrixInitialize_v2(ij_A, hypre_memory); + // m_csr_view.columns() use matrix coordinates local to sub-domain + // We need to translate them to global matrix coordinates + Span columns_index_span = m_csr_view.columns(); + if (is_parallel) { + // TODO: Faire sur accélérateur et ne faire qu'une fois si la structure + // ne change pas. + Int64 nb_column = columns_index_span.size(); + m_parallel_columns_index.resize(nb_column); + for (Int64 i = 0; i < nb_column; ++i) { + DoFLocalId lid(columns_index_span[i]); + //info() << "I=" << i << " index=" << columns_index_span[i]; + // Si lid correspond à une entité nulle, alors la valeur de la matrice + // ne sera pas utilisée. + if (!lid.isNull()) + m_parallel_columns_index[i] = m_dof_matrix_numbering[lid]; + else + m_parallel_columns_index[i] = 0; + } + columns_index_span = m_parallel_columns_index.to1DSpan(); + } + + if (do_debug_print) { + info() << "FINAL_COLUMNS=" << columns_index_span; + info() << "NbValue=" << m_csr_view.values().size(); + } + + Span matrix_values = m_csr_view.values(); + if (do_debug_print) { + ENUMERATE_ (DoF, idof, m_dof_family->allItems()) { + DoF dof = *idof; + Int32 nb_col = m_csr_view.rowsNbColumn()[idof.index()]; + Int32 row_csr_index = m_csr_view.rows()[idof.index()]; + info() << "DoF dof=" << ItemPrinter(dof) << " nb_col=" << nb_col << " row_csr_index=" << row_csr_index + << " global_row=" << rows_index_span[idof.index()]; + for (Int32 i = 0; i < nb_col; ++i) { + Int32 col_index = m_csr_view.columns()[row_csr_index + i]; + if (col_index >= 0) + info() << "COL=" << col_index + << " T_COL=" << m_dof_matrix_numbering[DoFLocalId(col_index)] + << " V=" << matrix_values[row_csr_index + i]; + else + info() << "COL=" << col_index + << " X_COL=" << columns_index_span[row_csr_index + i] + << " V=" << matrix_values[row_csr_index + i]; + } + } + } + + if (is_parallel) { + // Fill 'm_parallel_rows_index' with only rows we owns + // NOTE: This is only needed if matrix structure has changed. + Int32 index = 0; + ENUMERATE_ (DoF, idof, m_dof_family->allItems()) { + DoF dof = *idof; + if (!dof.isOwn()) + continue; + Int32 nb_col = m_csr_view.rowsNbColumn()[idof.index()]; + m_parallel_rows_index[index] = rows_index_span[idof.index()]; + ++index; + } + } + /* GPU pointers; efficient in large chunks */ HYPRE_IJMatrixSetValues(ij_A, - nb_row, + nb_local_row, rows_nb_column_data, rows_index_span.data(), - m_csr_view.columns().data(), - m_csr_view.values().data()); + columns_index_span.data(), + matrix_values.data()); HYPRE_IJMatrixAssemble(ij_A); HYPRE_IJMatrixGetObject(ij_A, (void**)&parcsr_A); Real m2 = platform::getRealTime(); info() << "Time to create matrix=" << (m2 - m1); - //HYPRE_IJMatrixPrint(ij_A, "dumpA.txt"); + if (do_dump_matrix) { + String file_name = String("dumpA.") + String::fromNumber(my_rank) + ".txt"; + HYPRE_IJMatrixPrint(ij_A, file_name.localstr()); + pm->traceMng()->flush(); + pm->barrier(); + } HYPRE_IJVector ij_vector_b = nullptr; HYPRE_ParVector parvector_b = nullptr; HYPRE_IJVector ij_vector_x = nullptr; HYPRE_ParVector parvector_x = nullptr; - hypreCheck("IJVectorCreate", HYPRE_IJVectorCreate(comm, first_row, last_row, &ij_vector_b)); + hypreCheck("IJVectorCreate", HYPRE_IJVectorCreate(mpi_comm, first_row, last_row, &ij_vector_b)); hypreCheck("IJVectorSetObjectType", HYPRE_IJVectorSetObjectType(ij_vector_b, HYPRE_PARCSR)); HYPRE_IJVectorInitialize_v2(ij_vector_b, hypre_memory); - hypreCheck("IJVectorCreate", HYPRE_IJVectorCreate(comm, first_row, last_row, &ij_vector_x)); + hypreCheck("IJVectorCreate", HYPRE_IJVectorCreate(mpi_comm, first_row, last_row, &ij_vector_x)); hypreCheck("IJVectorSetObjectType", HYPRE_IJVectorSetObjectType(ij_vector_x, HYPRE_PARCSR)); HYPRE_IJVectorInitialize_v2(ij_vector_x, hypre_memory); Real v1 = platform::getRealTime(); hypreCheck("HYPRE_IJVectorSetValues", - HYPRE_IJVectorSetValues(ij_vector_b, nb_row, rows_index_span.data(), + HYPRE_IJVectorSetValues(ij_vector_b, nb_local_row, rows_index_span.data(), m_rhs_variable.asArray().data())); hypreCheck("HYPRE_IJVectorSetValues", - HYPRE_IJVectorSetValues(ij_vector_x, nb_row, rows_index_span.data(), + HYPRE_IJVectorSetValues(ij_vector_x, nb_local_row, rows_index_span.data(), m_dof_variable.asArray().data())); hypreCheck("HYPRE_IJVectorAssemble", @@ -291,13 +438,19 @@ solve() Real v2 = platform::getRealTime(); info() << "Time to create vectors=" << (v2 - v1); - //HYPRE_IJVectorPrint(ij_vector_b, "dumpB.txt"); - //HYPRE_IJVectorPrint(ij_vector_x, "dumpX.txt"); + if (do_dump_matrix) { + String file_name_b = String("dumpB.") + String::fromNumber(my_rank) + ".txt"; + HYPRE_IJVectorPrint(ij_vector_b, file_name_b.localstr()); + String file_name_x = String("dumpX.") + String::fromNumber(my_rank) + ".txt"; + HYPRE_IJVectorPrint(ij_vector_x, file_name_x.localstr()); + pm->traceMng()->flush(); + pm->barrier(); + } HYPRE_Solver solver = nullptr; HYPRE_Solver precond = nullptr; /* setup AMG */ - HYPRE_ParCSRPCGCreate(comm, &solver); + HYPRE_ParCSRPCGCreate(mpi_comm, &solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ @@ -327,11 +480,21 @@ solve() Real b1 = platform::getRealTime(); info() << "Time to solve=" << (b1 - a1); - hypreCheck("HYPRE_IJVectorGetValues", - HYPRE_IJVectorGetValues(ij_vector_x, nb_row, rows_index_span.data(), - m_dof_variable.asArray().data())); - - HYPRE_Finalize(); /* must be the last HYPRE function call */ + if (is_parallel) { + Int32 nb_wanted_row = m_parallel_rows_index.extent0(); + hypreCheck("HYPRE_IJVectorGetValues", + HYPRE_IJVectorGetValues(ij_vector_x, nb_wanted_row, + m_parallel_rows_index.to1DSpan().data(), + m_result_work_values.to1DSpan().data())); + ENUMERATE_ (DoF, idof, m_dof_family->allItems().own()) { + m_dof_variable[idof] = m_result_work_values[idof.index()]; + } + } + else { + hypreCheck("HYPRE_IJVectorGetValues", + HYPRE_IJVectorGetValues(ij_vector_x, nb_local_row, rows_index_span.data(), + m_dof_variable.asArray().data())); + } } /*---------------------------------------------------------------------------*/ diff --git a/poisson/CMakeLists.txt b/poisson/CMakeLists.txt index b9db70d..cf11b4d 100644 --- a/poisson/CMakeLists.txt +++ b/poisson/CMakeLists.txt @@ -107,6 +107,10 @@ endif() if(FEMUTILS_HAS_SOLVER_BACKEND_HYPRE) add_test(NAME [poisson]poisson_hypre COMMAND Poisson Test.poisson.hypre.arc) add_test(NAME [poisson]poisson_hypre_direct COMMAND Poisson Test.poisson.hypre_direct.arc) + if(FEMUTILS_HAS_PARALLEL_SOLVER AND MPIEXEC_EXECUTABLE) + add_test(NAME [poisson]poisson_hypre_direct_2pe COMMAND ${MPIEXEC_EXECUTABLE} -n 2 ./Poisson Test.poisson.hypre_direct.arc) + add_test(NAME [poisson]poisson_hypre_direct_4pe COMMAND ${MPIEXEC_EXECUTABLE} -n 4 ./Poisson Test.poisson.hypre_direct.arc) + endif() endif() if(FEMUTILS_HAS_SOLVER_BACKEND_PETSC) add_test(NAME [poisson]poisson_petsc COMMAND Poisson Test.poisson.petsc.arc) @@ -117,6 +121,9 @@ if(ARCANE_HAS_ACCELERATOR) add_test(NAME [poisson]poisson_gpu COMMAND ./Poisson -A,AcceleratorRuntime=cuda Test.poisson.petsc.arc) if(FEMUTILS_HAS_SOLVER_BACKEND_HYPRE) add_test(NAME [poisson]poisson_hypre_direct_gpu COMMAND ./Poisson -A,AcceleratorRuntime=cuda Test.poisson.hypre_direct.arc) + if(FEMUTILS_HAS_PARALLEL_SOLVER AND MPIEXEC_EXECUTABLE) + add_test(NAME [poisson]poisson_hypre_direct_gpu_4pe COMMAND ${MPIEXEC_EXECUTABLE} -n 4 ./Poisson -A,AcceleratorRuntime=cuda Test.poisson.hypre_direct.arc) + endif() endif() endif() diff --git a/poisson/Test.poisson.hypre_direct.arc b/poisson/Test.poisson.hypre_direct.arc index 2dd7aa5..05e1aac 100644 --- a/poisson/Test.poisson.hypre_direct.arc +++ b/poisson/Test.poisson.hypre_direct.arc @@ -6,10 +6,11 @@ - 1 - - U - + 1 + false + + U +