diff --git a/plugins/hypre/bench/bench.cpp b/plugins/hypre/bench/bench.cpp index 10a77bceb..7f7a462b9 100644 --- a/plugins/hypre/bench/bench.cpp +++ b/plugins/hypre/bench/bench.cpp @@ -42,7 +42,7 @@ int test(const Alien::Hypre::OptionTypes::eSolver& solv, const Alien::Hypre::Opt for (int i = 0; i < NB_RUNS; i++) { Alien::Hypre::Options options; - options.numIterationsMax(500); + options.numIterationsMax(10000); options.stopCriteriaValue(1e-8); options.preconditioner(prec); // Jacobi, NoPC options.solver(solv); //CG, GMRES, BICG, BICGSTAB diff --git a/plugins/hypre/src/hypre_linear_solver.cpp b/plugins/hypre/src/hypre_linear_solver.cpp index 822980672..2421bb09f 100644 --- a/plugins/hypre/src/hypre_linear_solver.cpp +++ b/plugins/hypre/src/hypre_linear_solver.cpp @@ -90,18 +90,26 @@ bool InternalLinearSolver::solve(const Matrix& A, const Vector& b, Vector& x) precond_setup_function = HYPRE_BoomerAMGSetup; precond_destroy_function = HYPRE_BoomerAMGDestroy; -#ifdef ALIEN_HYPRE_DEVICE + // Important, set these parameters for running BoomerAMG as a preconditioner + HYPRE_BoomerAMGSetMaxIter(preconditioner, 1); + HYPRE_BoomerAMGSetTol(preconditioner, 0.0); + + HYPRE_BoomerAMGSetStrongThreshold(preconditioner, 0.5); // Better for 3d ? + + // HYPRE_BoomerAMGSetPrintLevel(preconditioner, 1); /* print amg solution info */ + + //#ifdef ALIEN_HYPRE_DEVICE // GPU only support a subset of paramater values. // see https://hypre.readthedocs.io/en/latest/solvers-boomeramg.html#gpu-supported-options - HYPRE_BoomerAMGSetRelaxType(preconditioner, 3); /* 3, 4, 6, 7, 18, 11, 12 */ + HYPRE_BoomerAMGSetRelaxType(preconditioner, 18); /* 3, 4, 6, 7, 18, 11, 12 */ HYPRE_BoomerAMGSetRelaxOrder(preconditioner, false); /* must be false */ HYPRE_BoomerAMGSetCoarsenType(preconditioner, 8); /* 8 */ - // FIXME: understand why 3 and 15 do not work with unit tests on CPUs. - HYPRE_BoomerAMGSetInterpType(preconditioner, 14); /* 3, 15, 6, 14, 18 */ - HYPRE_BoomerAMGSetAggInterpType(preconditioner, 5); /* 5 or 7 */ + HYPRE_BoomerAMGSetInterpType(preconditioner, 18); /* 3, 15, 6, 14, 18 */ + HYPRE_BoomerAMGSetAggInterpType(preconditioner, 7); /* 5 or 7 */ + HYPRE_BoomerAMGSetAggNumLevels(preconditioner, 5); HYPRE_BoomerAMGSetKeepTranspose(preconditioner, true); /* keep transpose to avoid SpMTV */ HYPRE_BoomerAMGSetRAP2(preconditioner, false); /* RAP in two multiplications (default: FALSE) */ -#endif // ALIEN_HYPRE_DEVICE + //#endif // ALIEN_HYPRE_DEVICE break; case OptionTypes::ParaSailsPC: precond_name = "parasails"; @@ -271,18 +279,18 @@ bool InternalLinearSolver::solve(const Matrix& A, const Vector& b, Vector& x) error = (*solver_solve_function)(solver, par_a, par_rhs, par_x); m_status.succeeded = (error == 0); - if (m_status.succeeded) { - checkError("Hypre " + solver_name + " solver GetNumIterations", - (*solver_get_num_iterations_function)(solver, &m_status.iteration_count)); - checkError("Hypre " + solver_name + " solver GetFinalResidual", - (*solver_get_final_relative_residual_function)(solver, &m_status.residual)); - - m_status.succeeded = (m_status.iteration_count < max_it) || (m_status.succeeded == max_it && m_status.residual <= rtol); - } - else { - // Solver is not converged. Clear Hypre errors for subsequent calls. - HYPRE_ClearAllErrors(); - } + //if (m_status.succeeded) { + checkError("Hypre " + solver_name + " solver GetNumIterations", + (*solver_get_num_iterations_function)(solver, &m_status.iteration_count)); + checkError("Hypre " + solver_name + " solver GetFinalResidual", + (*solver_get_final_relative_residual_function)(solver, &m_status.residual)); + + m_status.succeeded = (m_status.iteration_count < max_it) || (m_status.succeeded == max_it && m_status.residual <= rtol); + // } + // else { + // // Solver is not converged. Clear Hypre errors for subsequent calls. + // HYPRE_ClearAllErrors(); + // } checkError( "Hypre " + solver_name + " solver Destroy", (*solver_destroy_function)(solver)); diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 3380cbe8d..9e05664a4 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -22,10 +22,10 @@ add_subdirectory(alien/kernels/redistributor) add_subdirectory(alien/kernels/simple_csr) add_subdirectory(alien/index_manager) -IF(ALIEN_USE_SYCL) - find_package(hipSYCL CONFIG REQUIRED) - add_subdirectory(alien/kernels/sycl) -endif() +IF (ALIEN_USE_SYCL) + find_package(hipSYCL CONFIG REQUIRED) + add_subdirectory(alien/kernels/sycl) +endif () add_library(alien_core @@ -120,44 +120,47 @@ add_library(alien_core target_link_libraries(alien_core PUBLIC alien_utils alien_data) target_link_libraries(alien_core PUBLIC - alien_kernel_dok - alien_kernel_composite - alien_kernel_redistributor - alien_kernel_simplecsr) - + alien_kernel_dok + alien_kernel_composite + alien_kernel_redistributor + alien_kernel_simplecsr) target_link_libraries(alien_core PUBLIC alien_index_manager) set_property(TARGET alien_core PROPERTY CXX_STANDARD 17) +# For parallel STL +find_package(TBB REQUIRED) +target_link_libraries(alien_core PRIVATE TBB::tbb) + message(STATUS "Using AVX Simd instructions ? -> ${ALIEN_WANT_AVX}") message(STATUS "Using AVX2 Simd instructions ? -> ${ALIEN_WANT_AVX2}") message(STATUS "Using AVX512 Simd instructions ? -> ${ALIEN_WANT_AVX512}") -if(CMAKE_COMPILER_IS_GNUCXX OR (CMAKE_CXX_COMPILER_ID STREQUAL Clang)) - if(ALIEN_WANT_AVX) - target_compile_options(alien_core INTERFACE -mavx) - endif() - if(ALIEN_WANT_AVX2) - target_compile_options(alien_core INTERFACE -mavx -mfma) - endif() - if(ALIEN_WANT_AVX512) - target_compile_options(alien_core INTERFACE -mavx512f -mavx512cd) - endif() -endif() - -IF(ALIEN_USE_SYCL) - target_link_libraries(alien_core PUBLIC alien_kernel_sycl) - target_compile_options(alien_core PRIVATE "--gcc-toolchain=${GCCCORE_ROOT}") - add_sycl_to_target(TARGET alien_core) -ENDIF() +if (CMAKE_COMPILER_IS_GNUCXX OR (CMAKE_CXX_COMPILER_ID STREQUAL Clang)) + if (ALIEN_WANT_AVX) + target_compile_options(alien_core INTERFACE -mavx) + endif () + if (ALIEN_WANT_AVX2) + target_compile_options(alien_core INTERFACE -mavx -mfma) + endif () + if (ALIEN_WANT_AVX512) + target_compile_options(alien_core INTERFACE -mavx512f -mavx512cd) + endif () +endif () + +IF (ALIEN_USE_SYCL) + target_link_libraries(alien_core PUBLIC alien_kernel_sycl) + target_compile_options(alien_core PRIVATE "--gcc-toolchain=${GCCCORE_ROOT}") + add_sycl_to_target(TARGET alien_core) +ENDIF () target_link_libraries(alien_core PUBLIC - Arccore::arccore_trace - Arccore::arccore_collections - Arccore::arccore_base - Arccore::arccore_message_passing_mpi) + Arccore::arccore_trace + Arccore::arccore_collections + Arccore::arccore_base + Arccore::arccore_message_passing_mpi) find_package(Boost COMPONENTS program_options REQUIRED) diff --git a/src/core/alien/kernels/dok/DoKDistributor.cc b/src/core/alien/kernels/dok/DoKDistributor.cc index c75f5f5d6..3a068d66f 100644 --- a/src/core/alien/kernels/dok/DoKDistributor.cc +++ b/src/core/alien/kernels/dok/DoKDistributor.cc @@ -19,6 +19,7 @@ #include "DoKDistributor.h" #include +#include #include "DoKBackEnd.h" #include "DoKMatrixT.h" @@ -30,7 +31,7 @@ namespace Alien { DoKDistributor::DoKDistributor(const RedistributorCommPlan* commPlan) -: m_distributor(new DoKDistributorComm(commPlan)) +: m_distributor(std::make_unique(commPlan)) {} void DoKDistributor::distribute(const DoKMatrix& src, DoKMatrix& dst) @@ -41,7 +42,7 @@ void DoKDistributor::distribute(const DoKMatrix& src, DoKMatrix& dst) void DoKDistributor::distribute(const DoKVector& src, DoKVector& dst) { std::vector> pair_values(src.m_data.begin(), src.m_data.end()); - std::sort(pair_values.begin(), pair_values.end()); + std::sort(std::execution::par_unseq, pair_values.begin(), pair_values.end()); Arccore::UniqueArray snd_keys(src.m_data.size()); Arccore::UniqueArray snd_values(src.m_data.size()); diff --git a/src/core/alien/kernels/dok/DoKLocalMatrixIndexer.cc b/src/core/alien/kernels/dok/DoKLocalMatrixIndexer.cc index 66315afbc..fdb77561d 100644 --- a/src/core/alien/kernels/dok/DoKLocalMatrixIndexer.cc +++ b/src/core/alien/kernels/dok/DoKLocalMatrixIndexer.cc @@ -28,6 +28,7 @@ #include #include #include +#include #include "DoKReverseIndexer.h" @@ -75,8 +76,8 @@ namespace class KeyCompare { public: - typedef typename Map::iterator Iterator; - typedef typename Map::key_type Key; + using Iterator = typename Map::iterator; + using Key = typename Map::key_type; public: bool operator()(const Iterator& a, const Iterator& b) @@ -99,7 +100,7 @@ DoKLocalMatrixIndexer::sort(Arccore::Array& } KeyCompare compare; - std::sort(src.begin(), src.end(), compare); + std::sort(std::execution::par_unseq, src.begin(), src.end(), compare); auto* indexer = new DoKReverseIndexer(); auto size = static_cast(m_data.size()); diff --git a/src/movesemantic/alien/move/impl/MatrixMarketReader.cc b/src/movesemantic/alien/move/impl/MatrixMarketReader.cc index 457dfc7f7..34f3ea0e1 100644 --- a/src/movesemantic/alien/move/impl/MatrixMarketReader.cc +++ b/src/movesemantic/alien/move/impl/MatrixMarketReader.cc @@ -17,9 +17,12 @@ */ #include +#include #include #include +#include + #include #include @@ -48,35 +51,9 @@ namespace public: MatrixDescription() = default; - explicit MatrixDescription(Arccore::Span src) - { - if (src.size() != 4) { - throw Arccore::FatalErrorException("Matrix Descriptor", "Cannot deserialize array"); - } - n_rows = src[0]; - n_cols = src[1]; - n_nnz = src[2]; - symmetric = (src[3] == 0); - } - - Arccore::UniqueArray to_array() const - { - Arccore::UniqueArray array(4); - array[0] = n_rows; - array[1] = n_cols; - array[2] = n_nnz; - array[3] = symmetric ? 0 : 1; - return array; - } - - static constexpr size_t serializedSize() - { - return 4; - } - int n_rows{ 0 }; int n_cols{ 0 }; - int n_nnz{ 0 }; + size_t n_nnz{ 0 }; bool symmetric{ true }; }; @@ -146,15 +123,20 @@ namespace return std::make_optional(out); } - bool readValues(std::istream& fstream, DoKDirectMatrixBuilder& builder, bool symmetric) + bool readValues(std::istream& fstream, DoKDirectMatrixBuilder& builder, bool symmetric, size_t start, size_t stop) { - std::string line; - while (std::getline(fstream, line)) { + for (size_t i = 0; i < start; ++i) { + fstream.ignore(4096, '\n'); + } + std::string line; + size_t count = start; + while (count < stop && std::getline(fstream, line)) { if ('%' == line[0]) { continue; } + count++; int row = 0; int col = 0; double value = 0.0; @@ -183,32 +165,34 @@ namespace MatrixData ALIEN_MOVESEMANTIC_EXPORT readFromMatrixMarket(Arccore::MessagePassing::IMessagePassingMng* pm, const std::string& filename) { - if (pm->commRank() == 0) { // Only rank 0 read the file - std::ifstream stream; - std::array buf; // Buffer for reading - stream.rdbuf()->pubsetbuf(buf.data(), buf.size()); - stream.open(filename); - if (!stream) { - throw Arccore::FatalErrorException("readFromMatrixMarket", "Unable to read file"); - } - auto desc = readBanner(stream); - if (!desc) { - throw Arccore::FatalErrorException("readFromMatrixMarket", "Invalid header"); - } - auto ser_desc = desc.value().to_array(); - Arccore::MessagePassing::mpBroadcast(pm, ser_desc, 0); - DoKDirectMatrixBuilder builder(createMatrixData(desc.value(), pm)); - readValues(stream, builder, desc->symmetric); - return builder.release(); + std::ifstream stream; + std::array buf; // Buffer for reading + stream.rdbuf()->pubsetbuf(buf.data(), buf.size()); + stream.open(filename); + if (!stream) { + throw Arccore::FatalErrorException("readFromMatrixMarket", "Unable to matrix read file"); } - else { - // Receive description description from rank 0 - Arccore::UniqueArray ser_desc(MatrixDescription::serializedSize()); - Arccore::MessagePassing::mpBroadcast(pm, ser_desc, 0); - MatrixDescription desc(ser_desc); - DoKDirectMatrixBuilder builder(createMatrixData(desc, pm)); - return builder.release(); + auto desc = readBanner(stream); + if (!desc) { + throw Arccore::FatalErrorException("readFromMatrixMarket", "Invalid header"); } + DoKDirectMatrixBuilder builder(createMatrixData(desc.value(), pm)); + + size_t nnz_slice = desc.value().n_nnz / pm->commSize(); + + auto my_rank = pm->commRank(); + auto nnz_start = nnz_slice * my_rank; + auto nnz_stop = nnz_slice * (my_rank + 1); + if (my_rank == pm->commSize() - 1) { + nnz_stop = desc.value().n_nnz; + } + + std::cerr << "> " << pm->commRank() << " reading from " << nnz_start << " to " << nnz_stop << std::endl; + auto start = MPI_Wtime(); + readValues(stream, builder, desc->symmetric, nnz_start, nnz_stop); + auto stop = MPI_Wtime() - start; + std::cerr << "File reading took " << stop << std::endl; + return builder.release(); } VectorData ALIEN_MOVESEMANTIC_EXPORT @@ -217,71 +201,83 @@ readFromMatrixMarket(const VectorDistribution& distribution, const std::string& VectorData out(distribution); auto& v = out.impl()->template get(true); - if (distribution.parallelMng()->commRank() == 0) { // Only rank 0 read the file - std::ifstream stream; - std::array buf; // Buffer for reading - stream.rdbuf()->pubsetbuf(buf.data(), buf.size()); - stream.open(filename); - if (!stream) { - throw Arccore::FatalErrorException("readFromMatrixMarket", "Unable to read file"); - } - std::string line; - Arccore::Int32 row = 0; - - auto try_header = true; - while (std::getline(stream, line)) { - // get matrix kind - std::stringstream ss; - ss << line; + std::ifstream stream; + std::array buf; // Buffer for reading + stream.rdbuf()->pubsetbuf(buf.data(), buf.size()); + stream.open(filename); + if (!stream) { + throw Arccore::FatalErrorException("readFromMatrixMarket", "Unable to read vector file"); + } + std::string line; + size_t rows = 0; - if (try_header) { - std::string matrix; - std::string _; // junk - std::string format; // (coordinate, array) - std::string scalar; // (pattern, real, complex, integer) + auto try_header = true; + while (std::getline(stream, line)) { + // get matrix kind + std::stringstream ss; + ss << line; - ss >> matrix; // skip '%%MatrixMarket + if (try_header) { + std::string matrix; + std::string _; // junk + std::string format; // (coordinate, array) + std::string scalar; // (pattern, real, complex, integer) - if ("%%MatrixMarket" != matrix) - continue; + ss >> matrix; // skip '%%MatrixMarket - ss >> _; // skip matrix - ss >> format; - ss >> scalar; - ss >> _; + if ("%%MatrixMarket" != matrix) + continue; - tolower(format); - tolower(scalar); + ss >> _; // skip matrix + ss >> format; + ss >> scalar; + ss >> _; - if ("array" != format || "real" != scalar) { - throw Arccore::FatalErrorException("mtx vector must be in 'array' and 'real' formats."); - } - try_header = false; - } - if ('%' == line[0]) - continue; + tolower(format); + tolower(scalar); - int rows = 0; - int vectors = 0; - ss >> rows; - ss >> vectors; - if (vectors != 1) { - throw Arccore::FatalErrorException("mtx vector reader does not support multiple vectors."); - } - if (distribution.globalSize() != rows) { - throw Arccore::FatalErrorException("mtx vector is not of correct size"); + if ("array" != format || "real" != scalar) { + throw Arccore::FatalErrorException("mtx vector must be in 'array' and 'real' formats."); } - break; + try_header = false; } - while (std::getline(stream, line)) { - double value; - std::stringstream ss; - ss << line; - ss >> value; - - v.contribute(row, value); - row++; + if ('%' == line[0]) + continue; + + int vectors = 0; + ss >> rows; + ss >> vectors; + if (vectors != 1) { + throw Arccore::FatalErrorException("mtx vector reader does not support multiple vectors."); + } + if (distribution.globalSize() != rows) { + throw Arccore::FatalErrorException("mtx vector is not of correct size"); } + break; + } + + auto my_rank = distribution.parallelMng()->commRank(); + auto comm_size = distribution.parallelMng()->commSize(); + size_t line_slice = rows / comm_size; + auto row_start = line_slice * my_rank; + auto row_stop = line_slice * (my_rank + 1); + if (my_rank == comm_size - 1) { + row_stop = rows; + } + + Arccore::Int32 row = 0; + for (row = 0; row < row_start; ++row) { + stream.ignore(4096, '\n'); + } + + while (row < row_stop && std::getline(stream, line)) { + double value; + std::stringstream ss; + ss << line; + ss >> value; + + v.contribute(row, value); + row++; } v.assemble();