diff --git a/CMakeLists.txt b/CMakeLists.txt index 8495be41a..4c2176706 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,6 +50,12 @@ set_property(TARGET doctest::doctest PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${D find_package(Eigen3 3.4 REQUIRED) +option(ENABLE_MPI "Enable distributed-memory parallelization with MPI" OFF) +if(ENABLE_MPI) + find_package(MPI REQUIRED COMPONENTS C CXX) +endif() + + # To add netCDF to a target: # target_include_directories(target PUBLIC ${netCDF_INCLUDE_DIR}) # target_link_directories(target PUBLIC ${netCDF_LIB_DIR}) @@ -89,6 +95,10 @@ foreach(compo ${CodeComponents}) endforeach() add_library(nextsimlib SHARED ${NextsimSources}) +target_compile_definitions(nextsimlib + PRIVATE + $<$:USE_MPI> +) target_include_directories(nextsimlib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" @@ -97,9 +107,16 @@ target_include_directories(nextsimlib "${netCDF_INCLUDE_DIR}" ) target_link_directories(nextsimlib PUBLIC "${netCDF_LIB_DIR}") -target_link_libraries(nextsimlib PUBLIC Boost::program_options Boost::log "${NSDG_NetCDF_Library}" Eigen3::Eigen) +target_link_libraries(nextsimlib PUBLIC + Boost::program_options Boost::log "${NSDG_NetCDF_Library}" Eigen3::Eigen + $<$:MPI::MPI_C> $<$:MPI::MPI_CXX> + ) add_executable(nextsim ./core/src/main.cpp) +target_compile_definitions(nextsim + PRIVATE + $<$:USE_MPI> +) target_include_directories(nextsim PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${NextsimIncludeDirs}" diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index 0b33763b2..3d969e3e6 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -26,6 +26,10 @@ set(BaseSources "${ModelArrayStructure}/ModelArrayDetails.cpp" ) +set(ParallelNetCDFSources + "${CMAKE_CURRENT_SOURCE_DIR}/ParallelNetcdfFile.cpp" + ) + list(TRANSFORM BaseSources PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/") set(ModuleDir "${CMAKE_CURRENT_SOURCE_DIR}/modules") @@ -66,8 +70,25 @@ set(NextsimSources "${NextsimSources}" "${BaseSources}" "${ModuleSources}" + "${ParallelNetCDFSources}" PARENT_SCOPE) +if(ENABLE_MPI) + set(NextsimSources + "${NextsimSources}" + "${BaseSources}" + "${ModuleSources}" + "${ParallelNetCDFSources}" + PARENT_SCOPE) +else() + set(NextsimSources + "${NextsimSources}" + "${BaseSources}" + "${ModuleSources}" + PARENT_SCOPE) +endif() + + set(NextsimIncludeDirs "${NextsimIncludeDirs}" "${ModuleDir}" diff --git a/core/src/Model.cpp b/core/src/Model.cpp index 3d30e619a..14ac37531 100644 --- a/core/src/Model.cpp +++ b/core/src/Model.cpp @@ -2,6 +2,7 @@ * @file Model.cpp * @date 12 Aug 2021 * @author Tim Spain + * @author Kacper Kornet */ #include "include/Model.hpp" @@ -26,6 +27,9 @@ const std::string Model::restartOptionName = "model.init_file"; template <> const std::map Configured::keyMap = { { Model::RESTARTFILE_KEY, Model::restartOptionName }, +#ifdef USE_MPI + { Model::PARTITIONFILE_KEY, "model.partition_file" }, +#endif { Model::STARTTIME_KEY, "model.start" }, { Model::STOPTIME_KEY, "model.stop" }, { Model::RUNLENGTH_KEY, "model.run_length" }, @@ -33,8 +37,16 @@ const std::map Configured::keyMap = { { Model::MISSINGVALUE_KEY, "model.missing_value" }, }; +#ifdef USE_MPI +Model::Model(MPI_Comm comm) +#else Model::Model() +#endif { +#ifdef USE_MPI + m_etadata.setMpiMetadata(comm); +#endif + iterator.setIterant(&modelStep); finalFileName = "restart.nc"; @@ -78,7 +90,17 @@ void Model::configure() modelStep.init(); modelStep.setInitFile(initialFileName); +#ifdef USE_MPI + std::string partitionFile + = Configured::getConfiguration(keyMap.at(PARTITIONFILE_KEY), std::string("partition.nc")); +#endif + +#ifdef USE_MPI + ModelState initialState( + StructureFactory::stateFromFile(initialFileName, partitionFile, m_etadata)); +#else ModelState initialState(StructureFactory::stateFromFile(initialFileName)); +#endif modelStep.setData(pData); modelStep.setMetadata(m_etadata); pData.setData(initialState.data); @@ -115,6 +137,10 @@ Model::HelpMap& Model::getHelpText(HelpMap& map, bool getAll) "The file path to the restart file to use for the run." }, { keyMap.at(MISSINGVALUE_KEY), ConfigType::NUMERIC, { "-∞", "∞" }, "-2³⁰⁰", "", "Missing data indicator used for input and output." }, +#ifdef USE_MPI + { keyMap.at(PARTITIONFILE_KEY), ConfigType::STRING, {}, "", "", + "The file path to the file describing MPI domain decomposition to use for the run." }, +#endif }; return map; diff --git a/core/src/ModelMetadata.cpp b/core/src/ModelMetadata.cpp index e928984c6..bd76f9908 100644 --- a/core/src/ModelMetadata.cpp +++ b/core/src/ModelMetadata.cpp @@ -16,4 +16,13 @@ const std::string& ModelMetadata::structureName() const return Module::getImplementation().structureType(); } +#ifdef USE_MPI +void ModelMetadata::setMpiMetadata(MPI_Comm comm) +{ + mpiComm = comm; + MPI_Comm_size(mpiComm, &mpiSize); + MPI_Comm_rank(mpiComm, &mpiMyRank); +} +#endif + } /* namespace Nextsim */ diff --git a/core/src/ParallelNetcdfFile.cpp b/core/src/ParallelNetcdfFile.cpp new file mode 100644 index 000000000..c883f22d0 --- /dev/null +++ b/core/src/ParallelNetcdfFile.cpp @@ -0,0 +1,46 @@ +#include +#include +#include + +#include "include/ParallelNetcdfFile.hpp" + +using namespace netCDF; + +// Not sure why it is needed but let's replicate netCDF::ncFile::open +// in this respect +extern int g_ncid; + +NcFilePar::NcFilePar( + const std::string& filePath, const FileMode fMode, MPI_Comm comm, MPI_Info mpiInfo) +{ + open_par(filePath, fMode, comm, mpiInfo); +} + +void NcFilePar::open_par( + const std::string& filePath, const FileMode fMode, MPI_Comm comm, MPI_Info mpiInfo) +{ + if (!nullObject) + close(); + + switch (fMode) { + case NcFile::write: + ncCheck(nc_open_par(filePath.c_str(), NC_WRITE, comm, mpiInfo, &myId), __FILE__, __LINE__); + break; + case NcFile::read: + ncCheck( + nc_open_par(filePath.c_str(), NC_NOWRITE, comm, mpiInfo, &myId), __FILE__, __LINE__); + break; + case NcFile::newFile: + ncCheck(nc_create_par(filePath.c_str(), NC_NETCDF4 | NC_NOCLOBBER, comm, mpiInfo, &myId), + __FILE__, __LINE__); + break; + case NcFile::replace: + ncCheck(nc_create_par(filePath.c_str(), NC_NETCDF4 | NC_CLOBBER, comm, mpiInfo, &myId), + __FILE__, __LINE__); + break; + } + + g_ncid = myId; + + nullObject = false; +} diff --git a/core/src/RectGridIO.cpp b/core/src/RectGridIO.cpp index 44e038d30..7eeec0a64 100644 --- a/core/src/RectGridIO.cpp +++ b/core/src/RectGridIO.cpp @@ -3,6 +3,7 @@ * * @date Feb 8, 2022 * @author Tim Spain + * @author Kacper Kornet */ #include "include/RectGridIO.hpp" @@ -22,6 +23,9 @@ #include #include +#ifdef USE_MPI +#include +#endif #include #include #include @@ -29,6 +33,26 @@ namespace Nextsim { +#ifdef USE_MPI +void dimensionSetter(const netCDF::NcGroup& dataGroup, const std::string& fieldName, + ModelArray::Type type, ModelMetadata& metadata) +{ + size_t nDims = dataGroup.getVar(fieldName).getDimCount(); + ModelArray::MultiDim dims; + dims.resize(nDims); + dims[0] = metadata.localExtentX; + dims[1] = metadata.localExtentY; + for (size_t d = 2; d < nDims; ++d) { + dims[d] = dataGroup.getVar(fieldName).getDim(d).getSize(); + } + // The dimensions in the netCDF are in the reverse order compared to ModelArray + std::reverse(dims.begin(), dims.end()); + // A special case for Type::Z: use NZLevels for the third dimension + if (type == ModelArray::Type::Z) + dims[2] = NZLevels::get(); + ModelArray::setDimensions(type, dims); +} +#else void dimensionSetter( const netCDF::NcGroup& dataGroup, const std::string& fieldName, ModelArray::Type type) { @@ -45,13 +69,54 @@ void dimensionSetter( dims[2] = NZLevels::get(); ModelArray::setDimensions(type, dims); } +#endif +#ifdef USE_MPI +ModelState RectGridIO::getModelState( + const std::string& filePath, const std::string& partitionFile, ModelMetadata& metadata) +#else ModelState RectGridIO::getModelState(const std::string& filePath) +#endif { ModelState state; +#ifdef USE_MPI + readPartitionData(partitionFile, metadata); + netCDF::NcFilePar ncFile(filePath, netCDF::NcFile::read, metadata.mpiComm); +#else netCDF::NcFile ncFile(filePath, netCDF::NcFile::read); +#endif netCDF::NcGroup dataGroup(ncFile.getGroup(IStructure::dataNodeName())); +#ifdef USE_MPI + // Get the sizes of the four types of field + // HField from hice + dimensionSetter(dataGroup, hiceName, ModelArray::Type::H, metadata); + // UField from hice TODO replace with u velocity once it is present + dimensionSetter(dataGroup, hiceName, ModelArray::Type::U, metadata); + // VField from hice TODO replace with v velocity once it is present + dimensionSetter(dataGroup, hiceName, ModelArray::Type::V, metadata); + // ZField from tice + dimensionSetter(dataGroup, ticeName, ModelArray::Type::Z, metadata); + + // Set the origins and extensions for reading 2D data based + // on MPI decomposition + std::vector start(2); + std::vector size(2); + start[0] = metadata.localCornerY; + start[1] = metadata.localCornerX; + size[0] = metadata.localExtentY; + size[1] = metadata.localExtentX; + + state.data[maskName] = ModelArray::HField(); + dataGroup.getVar(maskName).getVar(start, size, &state.data[maskName][0]); + state.data[hiceName] = ModelArray::HField(); + dataGroup.getVar(hiceName).getVar(start, size, &state.data[hiceName][0]); + state.data[ciceName] = ModelArray::HField(); + dataGroup.getVar(ciceName).getVar(start, size, &state.data[ciceName][0]); + state.data[hsnowName] = ModelArray::HField(); + dataGroup.getVar(hsnowName).getVar(start, size, &state.data[hsnowName][0]); + +#else // Get the sizes of the four types of field // HField from hice dimensionSetter(dataGroup, hiceName, ModelArray::Type::H); @@ -70,6 +135,9 @@ ModelState RectGridIO::getModelState(const std::string& filePath) dataGroup.getVar(ciceName).getVar(&state.data[ciceName][0]); state.data[hsnowName] = ModelArray::HField(); dataGroup.getVar(hsnowName).getVar(&state.data[hsnowName][0]); +#endif + // Z direction is outside MPI ifdef as the domain is never decomposed in this direction + // Since the ZFierld might not have the same dimensions as the tice field // in the file, a little more work is required. state.data[ticeName] = ModelArray::ZField(); @@ -82,10 +150,39 @@ ModelState RectGridIO::getModelState(const std::string& filePath) return state; } +#ifdef USE_MPI +void RectGridIO::readPartitionData(const std::string& partitionFile, ModelMetadata& metadata) +{ + static const std::string bboxName = "bounding_boxes"; + + netCDF::NcFile ncFile(partitionFile, netCDF::NcFile::read); + int sizes = ncFile.getDim("L").getSize(); + int nBoxes = ncFile.getDim("P").getSize(); + if (nBoxes != metadata.mpiSize) { + std::string errorMsg = "Number of MPI ranks " + std::to_string(metadata.mpiSize) + " <> " + + std::to_string(nBoxes) + "\n"; + throw std::runtime_error(errorMsg); + } + metadata.globalExtentX = ncFile.getDim("globalX").getSize(); + metadata.globalExtentY = ncFile.getDim("globalY").getSize(); + netCDF::NcGroup bboxGroup(ncFile.getGroup(bboxName)); + std::vector index(1, metadata.mpiMyRank); + bboxGroup.getVar("global_x").getVar(index, &metadata.localCornerX); + bboxGroup.getVar("global_y").getVar(index, &metadata.localCornerY); + bboxGroup.getVar("local_extent_x").getVar(index, &metadata.localExtentX); + bboxGroup.getVar("local_extent_y").getVar(index, &metadata.localExtentY); + ncFile.close(); +} +#endif + void RectGridIO::dumpModelState(const ModelState& state, const ModelMetadata& metadata, const std::string& filePath, bool isRestart) const { +#ifdef USE_MPI + netCDF::NcFilePar ncFile(filePath, netCDF::NcFile::replace, metadata.mpiComm); +#else netCDF::NcFile ncFile(filePath, netCDF::NcFile::replace); +#endif CommonRestartMetadata::writeStructureType(ncFile, metadata); netCDF::NcGroup metaGroup = ncFile.addGroup(IStructure::metadataNodeName()); @@ -102,22 +199,47 @@ void RectGridIO::dumpModelState(const ModelState& state, const ModelMetadata& me // Create the dimension data, since it has to be in the same group as the // data or the parent group +#ifdef USE_MPI + netCDF::NcDim xDim = dataGroup.addDim(dimensionNames[0], metadata.globalExtentX); + netCDF::NcDim yDim = dataGroup.addDim(dimensionNames[1], metadata.globalExtentY); +#else netCDF::NcDim xDim = dataGroup.addDim(dimensionNames[0], nx); netCDF::NcDim yDim = dataGroup.addDim(dimensionNames[1], ny); - std::vector dims2 = { yDim, xDim }; +#endif netCDF::NcDim zDim = dataGroup.addDim(dimensionNames[2], nz); + std::vector dims2 = { yDim, xDim }; std::vector dims3 = { zDim, yDim, xDim }; +#ifdef USE_MPI + // Set the origins and extensions for reading 3D data based + // on MPI decomposition + std::vector start3 = { 0, static_cast(metadata.localCornerY), + static_cast(metadata.localCornerX) }; + std::vector size3 = { static_cast(nz), + static_cast(metadata.localExtentY), static_cast(metadata.localExtentX) }; + // Set the origins and extensions for reading 2D data based + // on MPI decomposition + std::vector start2(start3.begin() + 1, start3.end()); + std::vector size2(size3.begin() + 1, size3.end()); +#endif for (const auto entry : state.data) { const std::string& name = entry.first; if (entry.second.getType() == ModelArray::Type::H && entry.second.trueSize() > 0) { netCDF::NcVar var(dataGroup.addVar(name, netCDF::ncDouble, dims2)); var.putAtt(mdiName, netCDF::ncDouble, MissingData::value); +#ifdef USE_MPI + var.putVar(start2, size2, entry.second.getData()); +#else var.putVar(entry.second.getData()); +#endif } else if (entry.second.getType() == ModelArray::Type::Z && entry.second.trueSize() > 0) { netCDF::NcVar var(dataGroup.addVar(name, netCDF::ncDouble, dims3)); var.putAtt(mdiName, netCDF::ncDouble, MissingData::value); +#ifdef USE_MPI + var.putVar(start3, size3, entry.second.getData()); +#else var.putVar(entry.second.getData()); +#endif } } diff --git a/core/src/StructureFactory.cpp b/core/src/StructureFactory.cpp index da50e67ca..2c81b9410 100644 --- a/core/src/StructureFactory.cpp +++ b/core/src/StructureFactory.cpp @@ -3,6 +3,7 @@ * * @date Jan 18, 2022 * @author Tim Spain + * @author Kacper Kornet */ #include "include/StructureFactory.hpp" @@ -38,7 +39,12 @@ std::string structureNameFromFile(const std::string& filePath) return structureName; } +#ifdef USE_MPI +ModelState StructureFactory::stateFromFile( + const std::string& filePath, const std::string& partitionFile, ModelMetadata& metadata) +#else ModelState StructureFactory::stateFromFile(const std::string& filePath) +#endif { std::string structureName = structureNameFromFile(filePath); // TODO There must be a better way @@ -46,12 +52,20 @@ ModelState StructureFactory::stateFromFile(const std::string& filePath) Module::setImplementation("RectangularGrid"); RectangularGrid gridIn; gridIn.setIO(new RectGridIO(gridIn)); +#ifdef USE_MPI + return gridIn.getModelState(filePath, partitionFile, metadata); +#else return gridIn.getModelState(filePath); +#endif } else if (ParametricGrid::structureName == structureName) { Module::setImplementation("ParametricGrid"); ParametricGrid gridIn; gridIn.setIO(new ParaGridIO(gridIn)); +#ifdef USE_MPI + return gridIn.getModelState(filePath, partitionFile, metadata); +#else return gridIn.getModelState(filePath); +#endif } else { throw std::invalid_argument( std::string("fileFromName: structure not implemented: ") + structureName); diff --git a/core/src/include/Model.hpp b/core/src/include/Model.hpp index 75fcfbd1f..07ecdba98 100644 --- a/core/src/include/Model.hpp +++ b/core/src/include/Model.hpp @@ -2,6 +2,7 @@ * @file Model.hpp * @date 12 Aug 2021 * @author Tim Spain + * @author Kacper Kornet */ #ifndef MODEL_HPP @@ -23,13 +24,20 @@ namespace Nextsim { //! A class that encapsulates the whole of the model class Model : public Configured { public: +#ifdef USE_MPI + Model(MPI_Comm comm); +#else Model(); // TODO add arguments to pass the desired // environment and configuration to the model +#endif ~Model(); // Finalize the model. Collect data and so on. void configure() override; enum { RESTARTFILE_KEY, +#ifdef USE_MPI + PARTITIONFILE_KEY, +#endif STARTTIME_KEY, STOPTIME_KEY, RUNLENGTH_KEY, diff --git a/core/src/include/ModelMetadata.hpp b/core/src/include/ModelMetadata.hpp index a98110c85..7a5a0081c 100644 --- a/core/src/include/ModelMetadata.hpp +++ b/core/src/include/ModelMetadata.hpp @@ -13,6 +13,10 @@ #include +#ifdef USE_MPI +#include +#endif + namespace Nextsim { class CommonRestartMetadata; @@ -51,6 +55,15 @@ class ModelMetadata { // The metadata writer should be a friend friend CommonRestartMetadata; +#ifdef USE_MPI + void setMpiMetadata(MPI_Comm comm); + + MPI_Comm mpiComm; + int mpiSize = 0; + int mpiMyRank = -1; + int localCornerX, localCornerY, localExtentX, localExtentY, globalExtentX, globalExtentY; +#endif + private: TimePoint m_time; ConfigMap m_config; diff --git a/core/src/include/ParallelNetcdfFile.hpp b/core/src/include/ParallelNetcdfFile.hpp new file mode 100644 index 000000000..42d067cad --- /dev/null +++ b/core/src/include/ParallelNetcdfFile.hpp @@ -0,0 +1,16 @@ +#include +#include + +namespace netCDF { + +class NcFilePar : public netCDF::NcFile { +public: + NcFilePar() = default; + + NcFilePar(const std::string& filePath, const FileMode fMode, MPI_Comm comm, + MPI_Info info = MPI_INFO_NULL); + + void open_par(const std::string& path, const FileMode fMode, MPI_Comm comm, MPI_Info info); +}; + +} diff --git a/core/src/include/RectGridIO.hpp b/core/src/include/RectGridIO.hpp index 9a8744b32..1aee3ec90 100644 --- a/core/src/include/RectGridIO.hpp +++ b/core/src/include/RectGridIO.hpp @@ -3,6 +3,7 @@ * * @date Feb 8, 2022 * @author Tim Spain + * @author Kacper Kornet */ #ifndef RECTGRIDIO_HPP @@ -22,13 +23,20 @@ class RectGridIO : public RectangularGrid::IRectGridIO { typedef RectangularGrid::GridDimensions GridDimensions; +#ifdef USE_MPI + ModelState getModelState(const std::string& filePath, const std::string& partitionFile, + ModelMetadata& metadata) override; +#else ModelState getModelState(const std::string& filePath) override; +#endif void dumpModelState(const ModelState& state, const ModelMetadata& metadata, const std::string& filePath, bool isRestart) const override; private: RectGridIO() = default; + + void readPartitionData(const std::string& partitionFile, ModelMetadata& metadata); }; } /* namespace Nextsim */ diff --git a/core/src/include/StructureFactory.hpp b/core/src/include/StructureFactory.hpp index 69e429fbf..f94b72acf 100644 --- a/core/src/include/StructureFactory.hpp +++ b/core/src/include/StructureFactory.hpp @@ -3,6 +3,7 @@ * * @date Jan 18, 2022 * @author Tim Spain + * @author Kacper Kornet */ #ifndef STRUCTUREFACTORY_HPP @@ -19,12 +20,24 @@ namespace Nextsim { class StructureFactory { public: +#ifdef USE_MPI + /*! + * @brief Returns the ModelState of the named restart file. + * + * @param filePath the name of the file to be read. + * @param partitionFile name of file with data for MPI domain decomposition + * @param metadata ModelMedata to be used to get MPI parameters + */ + static ModelState stateFromFile( + const std::string& filePath, const std::string& partitionFile, ModelMetadata& metadata); +#else /*! * @brief Returns the ModelState of the named restart file. * * @param filePath the name of the file to be read. */ static ModelState stateFromFile(const std::string& filePath); +#endif /*! * @brief Takes a ModelState and a template file name to write the state diff --git a/core/src/main.cpp b/core/src/main.cpp index b4224acf1..bb94b4044 100644 --- a/core/src/main.cpp +++ b/core/src/main.cpp @@ -2,9 +2,13 @@ * @file main.cpp * @date 11 Aug 2021 * @author Tim Spain + * @author Kacper Kornet */ #include +#ifdef USE_MPI +#include +#endif #include "include/CommandLineParser.hpp" #include "include/ConfigurationHelpPrinter.hpp" @@ -15,6 +19,10 @@ int main(int argc, char* argv[]) { +#ifdef USE_MPI + MPI_Init(&argc, &argv); +#endif // USE_MPI + // Pass the command line to Configurator to handle Nextsim::Configurator::setCommandLine(argc, argv); // Extract any config files defined on the command line @@ -40,12 +48,19 @@ int main(int argc, char* argv[]) Nextsim::ConfigurationHelpPrinter::print(std::cout, map, cmdLine.configHelp()); } else { // Construct the Model +#ifdef USE_MPI + Nextsim::Model model(MPI_COMM_WORLD); +#else Nextsim::Model model; +#endif // Apply the model configuration model.configure(); // Run the Model model.run(); } +#ifdef USE_MPI + MPI_Finalize(); +#endif return 0; } diff --git a/core/src/modules/SimpleOutput.cpp b/core/src/modules/SimpleOutput.cpp index f7d1c47fd..996f6390e 100644 --- a/core/src/modules/SimpleOutput.cpp +++ b/core/src/modules/SimpleOutput.cpp @@ -20,6 +20,8 @@ void SimpleOutput::outputState(const ModelMetadata& meta) std::stringstream startStream; startStream << meta.time(); std::string timeFileName = m_filePrefix + "." + startStream.str() + ".nc"; + // Some MPI-IO implemenetations does not like colon in file names + std::replace(timeFileName.begin(), timeFileName.end(), ':', '_'); Logged::info("Outputting " + std::to_string(externalNames.size()) + " fields to " + timeFileName + "\n"); diff --git a/core/src/modules/include/IStructure.hpp b/core/src/modules/include/IStructure.hpp index 015476867..b0592ec8e 100644 --- a/core/src/modules/include/IStructure.hpp +++ b/core/src/modules/include/IStructure.hpp @@ -3,6 +3,7 @@ * * @date Dec 17, 2021 * @author Tim Spain + * @author Kacper Kornet */ #ifndef ISTRUCTURE_HPP @@ -42,7 +43,13 @@ class IStructure { /*! * @brief Returns the ModelState stored in the file */ +#ifdef USE_MPI + virtual ModelState getModelState( + const std::string& filePath, const std::string& filePartition, ModelMetadata& metadata) + = 0; +#else virtual ModelState getModelState(const std::string& filePath) = 0; +#endif //! Returns the structure name that this class will process virtual const std::string& structureType() const { return processedStructureName; } diff --git a/core/src/modules/include/ParametricGrid.hpp b/core/src/modules/include/ParametricGrid.hpp index ee7af10ab..bf7687582 100644 --- a/core/src/modules/include/ParametricGrid.hpp +++ b/core/src/modules/include/ParametricGrid.hpp @@ -3,6 +3,7 @@ * * @date Oct 24, 2022 * @author Tim Spain + * @author Kacper Kornet */ #ifndef PARAMETRICGRID_HPP @@ -34,10 +35,18 @@ class ParametricGrid : public IStructure { } // Read/write override functions +#ifdef USE_MPI + ModelState getModelState(const std::string& filePath, const std::string& partitionFile, + ModelMetadata& metadata) override + { + return pio ? pio->getModelState(filePath) : ModelState(); + } +#else ModelState getModelState(const std::string& filePath) override { return pio ? pio->getModelState(filePath) : ModelState(); } +#endif void dumpModelState(const ModelState& state, const ModelMetadata& metadata, const std::string& filePath, bool isRestart = false) const override diff --git a/core/src/modules/include/RectangularGrid.hpp b/core/src/modules/include/RectangularGrid.hpp index 26e8e0686..bb530eb4d 100644 --- a/core/src/modules/include/RectangularGrid.hpp +++ b/core/src/modules/include/RectangularGrid.hpp @@ -3,6 +3,7 @@ * * @date Feb 7, 2022 * @author Tim Spain + * @author Kacper Kornet */ #ifndef RECTANGULARGRID_HPP @@ -47,10 +48,18 @@ class RectangularGrid : public IStructure { } // Read/write override functions +#ifdef USE_MPI + ModelState getModelState(const std::string& filePath, const std::string& partitionFile, + ModelMetadata& metadata) override + { + return pio ? pio->getModelState(filePath, partitionFile, metadata) : ModelState(); + } +#else ModelState getModelState(const std::string& filePath) override { return pio ? pio->getModelState(filePath) : ModelState(); } +#endif void dumpModelState( const ModelState& state, const ModelMetadata& metadata, const std::string& filePath, bool isRestart = false) const override @@ -77,7 +86,13 @@ class RectangularGrid : public IStructure { } virtual ~IRectGridIO() = default; +#ifdef USE_MPI + virtual ModelState getModelState( + const std::string& filePath, const std::string& partitionFile, ModelMetadata& metadata) + = 0; +#else virtual ModelState getModelState(const std::string& filePath) = 0; +#endif /*! * @brief Dumps the given ModelState to the given file path. diff --git a/core/test/CMakeLists.txt b/core/test/CMakeLists.txt index d9d663ff7..1a237b228 100644 --- a/core/test/CMakeLists.txt +++ b/core/test/CMakeLists.txt @@ -41,13 +41,25 @@ target_link_libraries(testModelArray PRIVATE doctest::doctest Eigen3::Eigen) set(MODEL_INCLUDE_DIR "../../core/src/discontinuousgalerkin") -add_executable(testRectGrid "RectGrid_test.cpp") -target_include_directories(testRectGrid PRIVATE ${MODEL_INCLUDE_DIR}) -target_link_libraries(testRectGrid PRIVATE nextsimlib doctest::doctest) - -add_executable(testParaGrid "ParaGrid_test.cpp") -target_include_directories(testParaGrid PRIVATE ${MODEL_INCLUDE_DIR}) -target_link_libraries(testParaGrid PRIVATE nextsimlib doctest::doctest) +if(ENABLE_MPI) + add_executable(testRectGrid_MPI "RectGrid_test.cpp" "MainMPI.cpp") + target_compile_definitions(testRectGrid_MPI PRIVATE USE_MPI) + target_include_directories(testRectGrid_MPI PRIVATE ${MODEL_INCLUDE_DIR}) + target_link_libraries(testRectGrid_MPI PRIVATE nextsimlib doctest::doctest) +else() + add_executable(testRectGrid "RectGrid_test.cpp") + target_include_directories(testRectGrid PRIVATE ${MODEL_INCLUDE_DIR}) + target_link_libraries(testRectGrid PRIVATE nextsimlib doctest::doctest) +endif() + +if(ENABLE_MPI) + message(WARNING "testParaGrid has been temporarily disabled when running with MPI enabled") +else() + add_executable(testParaGrid "ParaGrid_test.cpp") + target_compile_definitions(testParaGrid PRIVATE TEST_FILE_SOURCE=${CMAKE_CURRENT_SOURCE_DIR}) + target_include_directories(testParaGrid PRIVATE ${MODEL_INCLUDE_DIR}) + target_link_libraries(testParaGrid PRIVATE nextsimlib doctest::doctest) +endif() add_executable(testModelComponent "ModelComponent_test.cpp") target_include_directories(testModelComponent PRIVATE ${MODEL_INCLUDE_DIR}) @@ -62,9 +74,13 @@ add_executable(testPrognosticData "PrognosticData_test.cpp" "DynamicsModuleForPD target_include_directories(testPrognosticData PRIVATE ${PHYSICS_INCLUDE_DIRS} ${MODEL_INCLUDE_DIR}) target_link_libraries(testPrognosticData PRIVATE nextsimlib doctest::doctest) -add_executable(testConfigOutput "ConfigOutput_test.cpp") -target_include_directories(testConfigOutput PRIVATE ${MODEL_INCLUDE_DIR}) -target_link_libraries(testConfigOutput PRIVATE nextsimlib doctest::doctest) +if(ENABLE_MPI) + message(WARNING "testConfigOutput has been temporarily disabled when running with MPI enabled") +else() + add_executable(testConfigOutput "ConfigOutput_test.cpp") + target_include_directories(testConfigOutput PRIVATE ${MODEL_INCLUDE_DIR}) + target_link_libraries(testConfigOutput PRIVATE nextsimlib doctest::doctest) +endif() add_executable(testMonthlyCubicBSpline "MonthlyCubicBSpline_test.cpp" diff --git a/core/test/MainMPI.cpp b/core/test/MainMPI.cpp new file mode 100644 index 000000000..6d080b76a --- /dev/null +++ b/core/test/MainMPI.cpp @@ -0,0 +1,19 @@ +#define DOCTEST_CONFIG_IMPLEMENT + +#include + +int main(int argc, char** argv) { + doctest::mpi_init_thread(argc,argv,MPI_THREAD_MULTIPLE); + + doctest::Context ctx; + ctx.setOption("reporters", "MpiConsoleReporter"); + ctx.setOption("reporters", "MpiFileReporter"); + ctx.setOption("force-colors", true); + ctx.applyCommandLine(argc, argv); + + int test_result = ctx.run(); + + doctest::mpi_finalize(); + + return test_result; +} diff --git a/core/test/RectGrid_test.cpp b/core/test/RectGrid_test.cpp index 6bfec04e4..be6126331 100644 --- a/core/test/RectGrid_test.cpp +++ b/core/test/RectGrid_test.cpp @@ -5,8 +5,13 @@ * @author Tim Spain */ +#ifdef USE_MPI +#include +#else #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN #include +#endif + #include "include/CommonRestartMetadata.hpp" #include "include/NZLevels.hpp" @@ -18,10 +23,16 @@ #include const std::string filename = "RectGrid_test.nc"; +const std::string partition_filename = "partition_metadata_1.nc"; namespace Nextsim { TEST_SUITE_BEGIN("RectGrid"); +#ifdef USE_MPI +// Number of ranks should not be hardcoded here +MPI_TEST_CASE("Write and read a ModelState-based RectGrid restart file", 1) +#else TEST_CASE("Write and read a ModelState-based RectGrid restart file") +#endif { RectangularGrid grid; grid.setIO(new RectGridIO(grid)); @@ -68,6 +79,15 @@ TEST_CASE("Write and read a ModelState-based RectGrid restart file") ModelMetadata metadata; metadata.setTime(TimePoint("2000-01-01T00:00:00Z")); +#ifdef USE_MPI + metadata.setMpiMetadata(test_comm); + metadata.globalExtentX = nx; + metadata.globalExtentY = ny; + metadata.localCornerX = 0; + metadata.localCornerY = 0; + metadata.localExtentX = nx; + metadata.localExtentY = ny; +#endif grid.dumpModelState(state, metadata, filename); ModelArray::setDimensions(ModelArray::Type::H, { 1, 1 }); @@ -77,7 +97,11 @@ TEST_CASE("Write and read a ModelState-based RectGrid restart file") size_t targetY = 7; gridIn.setIO(new RectGridIO(grid)); +#ifdef USE_MPI + ModelState ms = gridIn.getModelState(filename, partition_filename, metadata); +#else ModelState ms = gridIn.getModelState(filename); +#endif REQUIRE(ModelArray::dimensions(ModelArray::Type::H)[0] == nx); REQUIRE(ModelArray::dimensions(ModelArray::Type::H)[1] == ny); diff --git a/lib/doctest/doctest.h b/lib/doctest/doctest.h index c158b5b94..5c754cde0 100644 --- a/lib/doctest/doctest.h +++ b/lib/doctest/doctest.h @@ -4,7 +4,7 @@ // // doctest.h - the lightest feature-rich C++ single-header testing framework for unit tests and TDD // -// Copyright (c) 2016-2021 Viktor Kirilov +// Copyright (c) 2016-2023 Viktor Kirilov // // Distributed under the MIT Software License // See accompanying file LICENSE.txt or copy at @@ -48,7 +48,7 @@ #define DOCTEST_VERSION_MAJOR 2 #define DOCTEST_VERSION_MINOR 4 -#define DOCTEST_VERSION_PATCH 10 +#define DOCTEST_VERSION_PATCH 11 // util we need here #define DOCTEST_TOSTR_IMPL(x) #x @@ -1313,9 +1313,9 @@ namespace detail { template struct decay_array { using type = T*; }; template struct decay_array { using type = T*; }; - template struct not_char_pointer { static DOCTEST_CONSTEXPR value = 1; }; - template<> struct not_char_pointer { static DOCTEST_CONSTEXPR value = 0; }; - template<> struct not_char_pointer { static DOCTEST_CONSTEXPR value = 0; }; + template struct not_char_pointer { static DOCTEST_CONSTEXPR int value = 1; }; + template<> struct not_char_pointer { static DOCTEST_CONSTEXPR int value = 0; }; + template<> struct not_char_pointer { static DOCTEST_CONSTEXPR int value = 0; }; template struct can_use_op : public not_char_pointer::type> {}; #endif // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING @@ -5906,7 +5906,22 @@ namespace { testCaseData.addFailure(rb.m_decomp.c_str(), assertString(rb.m_at), os.str()); } - void log_message(const MessageData&) override {} + void log_message(const MessageData& mb) override { + if(mb.m_severity & assertType::is_warn) // report only failures + return; + + DOCTEST_LOCK_MUTEX(mutex) + + std::ostringstream os; + os << skipPathFromFilename(mb.m_file) << (opt.gnu_file_line ? ":" : "(") + << line(mb.m_line) << (opt.gnu_file_line ? ":" : "):") << std::endl; + + os << mb.m_string.c_str() << "\n"; + log_contexts(os); + + testCaseData.addFailure(mb.m_string.c_str(), + mb.m_severity & assertType::is_check ? "FAIL_CHECK" : "FAIL", os.str()); + } void test_case_skipped(const TestCaseData&) override {} @@ -6246,9 +6261,9 @@ namespace { separator_to_stream(); s << std::dec; - auto totwidth = int(std::ceil(log10((std::max(p.numTestCasesPassingFilters, static_cast(p.numAsserts))) + 1))); - auto passwidth = int(std::ceil(log10((std::max(p.numTestCasesPassingFilters - p.numTestCasesFailed, static_cast(p.numAsserts - p.numAssertsFailed))) + 1))); - auto failwidth = int(std::ceil(log10((std::max(p.numTestCasesFailed, static_cast(p.numAssertsFailed))) + 1))); + auto totwidth = int(std::ceil(log10(static_cast(std::max(p.numTestCasesPassingFilters, static_cast(p.numAsserts))) + 1))); + auto passwidth = int(std::ceil(log10(static_cast(std::max(p.numTestCasesPassingFilters - p.numTestCasesFailed, static_cast(p.numAsserts - p.numAssertsFailed))) + 1))); + auto failwidth = int(std::ceil(log10(static_cast(std::max(p.numTestCasesFailed, static_cast(p.numAssertsFailed))) + 1))); const bool anythingFailed = p.numTestCasesFailed > 0 || p.numAssertsFailed > 0; s << Color::Cyan << "[doctest] " << Color::None << "test cases: " << std::setw(totwidth) << p.numTestCasesPassingFilters << " | " diff --git a/lib/doctest/extensions/doctest_mpi.h b/lib/doctest/extensions/doctest_mpi.h new file mode 100644 index 000000000..ce9695260 --- /dev/null +++ b/lib/doctest/extensions/doctest_mpi.h @@ -0,0 +1,169 @@ +#ifndef DOCTEST_MPI_H +#define DOCTEST_MPI_H + +#ifdef DOCTEST_CONFIG_IMPLEMENT + +#include "doctest/extensions/mpi_sub_comm.h" +#include "mpi_reporter.h" +#include + +namespace doctest { + +// Each time a MPI_TEST_CASE is executed on N procs, +// we need a sub-communicator of N procs to execute it. +// It is then registered here and can be re-used +// by other tests that requires a sub-comm of the same size +std::unordered_map sub_comms_by_size; + +// Record if at least one MPI_TEST_CASE was registered "skipped" +// because there is not enought procs to execute it +int nb_test_cases_skipped_insufficient_procs = 0; + + +std::string thread_level_to_string(int thread_lvl); +int mpi_init_thread(int argc, char *argv[], int required_thread_support); +void mpi_finalize(); + + +// Can be safely called before MPI_Init() +// This is needed for MPI_TEST_CASE because we use doctest::skip() +// to prevent execution of tests where there is not enough procs, +// but doctest::skip() is called during test registration, that is, before main(), and hence before MPI_Init() +int mpi_comm_world_size() { + #if defined(OPEN_MPI) + const char* size_str = std::getenv("OMPI_COMM_WORLD_SIZE"); + #elif defined(I_MPI_VERSION) || defined(MPI_VERSION) // Intel MPI + MPICH (at least) + const char* size_str = std::getenv("PMI_SIZE"); // see https://community.intel.com/t5/Intel-oneAPI-HPC-Toolkit/Environment-variables-defined-by-intel-mpirun/td-p/1096703 + #else + #error "Unknown MPI implementation: please submit an issue or a PR to doctest. Meanwhile, you can look at the output of e.g. `mpirun -np 3 env` to search for an environnement variable that contains the size of MPI_COMM_WORLD and extend this code accordingly" + #endif + if (size_str==nullptr) return 1; // not launched with mpirun/mpiexec, so assume only one process + return std::stoi(size_str); +} + +// Record size of MPI_COMM_WORLD with mpi_comm_world_size() +int world_size_before_init = mpi_comm_world_size(); + + +std::string thread_level_to_string(int thread_lvl) { + switch (thread_lvl) { + case MPI_THREAD_SINGLE: return "MPI_THREAD_SINGLE"; + case MPI_THREAD_FUNNELED: return "MPI_THREAD_FUNNELED"; + case MPI_THREAD_SERIALIZED: return "MPI_THREAD_SERIALIZED"; + case MPI_THREAD_MULTIPLE: return "MPI_THREAD_MULTIPLE"; + default: return "Invalid MPI thread level"; + } +} +int mpi_init_thread(int argc, char *argv[], int required_thread_support) { + int provided_thread_support; + MPI_Init_thread(&argc, &argv, required_thread_support, &provided_thread_support); + + int world_size; + MPI_Comm_size(MPI_COMM_WORLD,&world_size); + if (world_size_before_init != world_size) { + DOCTEST_INTERNAL_ERROR( + "doctest found "+std::to_string(world_size_before_init)+" MPI processes before `MPI_Init_thread`," + " but MPI_COMM_WORLD is actually of size "+std::to_string(world_size)+".\n" + "This is most likely due to your MPI implementation not being well supported by doctest. Please report this issue on GitHub" + ); + } + + if (provided_thread_support!=required_thread_support) { + std::cout << + "WARNING: " + thread_level_to_string(required_thread_support) + " was asked, " + + "but only " + thread_level_to_string(provided_thread_support) + " is provided by the MPI library\n"; + } + return provided_thread_support; +} +void mpi_finalize() { + // We need to destroy all created sub-communicators before calling MPI_Finalize() + doctest::sub_comms_by_size.clear(); + MPI_Finalize(); +} + +} // doctest + +#else // DOCTEST_CONFIG_IMPLEMENT + +#include "doctest/extensions/mpi_sub_comm.h" +#include +#include + +namespace doctest { + +extern std::unordered_map sub_comms_by_size; +extern int nb_test_cases_skipped_insufficient_procs; +extern int world_size_before_init; +int mpi_comm_world_size(); + +int mpi_init_thread(int argc, char *argv[], int required_thread_support); +void mpi_finalize(); + +template +void execute_mpi_test_case(F func) { + auto it = sub_comms_by_size.find(nb_procs); + if (it==end(sub_comms_by_size)) { + bool was_emplaced = false; + std::tie(it,was_emplaced) = sub_comms_by_size.emplace(std::make_pair(nb_procs,mpi_sub_comm(nb_procs))); + assert(was_emplaced); + } + const mpi_sub_comm& sub = it->second; + if (sub.comm != MPI_COMM_NULL) { + func(sub.rank,nb_procs,sub.comm,std::integral_constant{}); + }; +} + +inline bool +insufficient_procs(int test_nb_procs) { + static const int world_size = mpi_comm_world_size(); + bool insufficient = test_nb_procs>world_size; + if (insufficient) { + ++nb_test_cases_skipped_insufficient_procs; + } + return insufficient; +} + +} // doctest + + +#define DOCTEST_MPI_GEN_ASSERTION(rank_to_test, assertion, ...) \ + static_assert(rank_to_test); \ + TEST_CASE(name * doctest::description("MPI_TEST_CASE") * doctest::skip(doctest::insufficient_procs(nb_procs))) { \ + doctest::execute_mpi_test_case(func); \ + } \ + static void func(DOCTEST_UNUSED int test_rank, DOCTEST_UNUSED int test_nb_procs, DOCTEST_UNUSED MPI_Comm test_comm, DOCTEST_UNUSED std::integral_constant test_nb_procs_as_int_constant) + // DOC: test_rank, test_nb_procs, and test_comm are available UNDER THESE SPECIFIC NAMES in the body of the unit test + // DOC: test_nb_procs_as_int_constant is equal to test_nb_procs, but as a compile time value + // (used in CHECK-like macros to assert the checked rank exists) + +#define DOCTEST_MPI_TEST_CASE(name,nb_procs) \ + DOCTEST_CREATE_MPI_TEST_CASE(name,nb_procs,DOCTEST_ANONYMOUS(DOCTEST_MPI_FUNC)) + + +// == SHORT VERSIONS OF THE MACROS +#if !defined(DOCTEST_CONFIG_NO_SHORT_MACRO_NAMES) +#define MPI_WARN DOCTEST_MPI_WARN +#define MPI_CHECK DOCTEST_MPI_CHECK +#define MPI_REQUIRE DOCTEST_MPI_REQUIRE +#define MPI_WARN_FALSE DOCTEST_MPI_WARN_FALSE +#define MPI_CHECK_FALSE DOCTEST_MPI_CHECK_FALSE +#define MPI_REQUIRE_FALSE DOCTEST_MPI_REQUIRE_FALSE + +#define MPI_TEST_CASE DOCTEST_MPI_TEST_CASE +#endif // DOCTEST_CONFIG_NO_SHORT_MACRO_NAMES + + +#endif // DOCTEST_CONFIG_IMPLEMENT + +#endif // DOCTEST_MPI_H diff --git a/lib/doctest/extensions/doctest_util.h b/lib/doctest/extensions/doctest_util.h new file mode 100644 index 000000000..3e63ebc97 --- /dev/null +++ b/lib/doctest/extensions/doctest_util.h @@ -0,0 +1,37 @@ +// +// doctest_util.h - an accompanying extensions header to the main doctest.h header +// +// Copyright (c) 2016-2023 Viktor Kirilov +// +// Distributed under the MIT Software License +// See accompanying file LICENSE.txt or copy at +// https://opensource.org/licenses/MIT +// +// The documentation can be found at the library's page: +// https://github.com/doctest/doctest/blob/master/doc/markdown/readme.md +// + +#ifndef DOCTEST_UTIL_H +#define DOCTEST_UTIL_H + +#ifndef DOCTEST_LIBRARY_INCLUDED +#include "../doctest.h" +#endif + +#include +#include +#include + +namespace doctest { + + inline void applyCommandLine(doctest::Context& ctx, const std::vector& args) { + auto doctest_args = std::make_unique(args.size()); + for (size_t i = 0; i < args.size(); ++i) { + doctest_args[i] = args[i].c_str(); + } + ctx.applyCommandLine(args.size(), doctest_args.get()); + } + +} // namespace doctest + +#endif // DOCTEST_UTIL_H diff --git a/lib/doctest/extensions/mpi_reporter.h b/lib/doctest/extensions/mpi_reporter.h new file mode 100644 index 000000000..19a43e00d --- /dev/null +++ b/lib/doctest/extensions/mpi_reporter.h @@ -0,0 +1,271 @@ +#ifndef DOCTEST_MPI_REPORTER_H +#define DOCTEST_MPI_REPORTER_H + +// #include +#include +#include +#include "mpi.h" + + +#include +#include + +namespace doctest { + +extern int nb_test_cases_skipped_insufficient_procs; +int mpi_comm_world_size(); + +namespace { + +// https://stackoverflow.com/a/11826666/1583122 +struct NullBuffer : std::streambuf { + int overflow(int c) { return c; } +}; +class NullStream : public std::ostream { + public: + NullStream() + : std::ostream(&nullBuff) + {} + private: + NullBuffer nullBuff = {}; +}; +static NullStream nullStream; + + +/* \brief Extends the ConsoleReporter of doctest + * Each process writes its results to its own file + * Intended to be used when a test assertion fails and the user wants to know exactly what happens on which process + */ +struct MpiFileReporter : public ConsoleReporter { + std::ofstream logfile_stream = {}; + + MpiFileReporter(const ContextOptions& co) + : ConsoleReporter(co,logfile_stream) + { + int rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + std::string logfile_name = "doctest_" + std::to_string(rank) + ".log"; + + logfile_stream = std::ofstream(logfile_name.c_str(), std::fstream::out); + } +}; + + +/* \brief Extends the ConsoleReporter of doctest + * Allows to manage the execution of tests in a parallel framework + * All results are collected on rank 0 + */ +struct MpiConsoleReporter : public ConsoleReporter { +private: + static std::ostream& replace_by_null_if_not_rank_0(std::ostream* os) { + int rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank==0) { + return *os; + } else { + return nullStream; + } + } + std::vector> m_failure_str_queue = {}; +public: + MpiConsoleReporter(const ContextOptions& co) + : ConsoleReporter(co,replace_by_null_if_not_rank_0(co.cout)) + {} + + std::string file_line_to_string(const char* file, int line, + const char* tail = ""){ + std::stringstream ss; + ss << skipPathFromFilename(file) + << (opt.gnu_file_line ? ":" : "(") + << (opt.no_line_numbers ? 0 : line) // 0 or the real num depending on the option + << (opt.gnu_file_line ? ":" : "):") << tail; + return ss.str(); + } + + void test_run_end(const TestRunStats& p) override { + ConsoleReporter::test_run_end(p); + + const bool anythingFailed = p.numTestCasesFailed > 0 || p.numAssertsFailed > 0; + + // ----------------------------------------------------- + // > Gather information in rank 0 + int n_rank, rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &n_rank); + + int g_numAsserts = 0; + int g_numAssertsFailed = 0; + int g_numTestCasesFailed = 0; + + MPI_Reduce(&p.numAsserts , &g_numAsserts , 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&p.numAssertsFailed , &g_numAssertsFailed , 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&p.numTestCasesFailed, &g_numTestCasesFailed, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + + std::vector numAssertsFailedByRank; + if(rank == 0){ + numAssertsFailedByRank.resize(static_cast(n_rank)); + } + + MPI_Gather(&p.numAssertsFailed, 1, MPI_INT, numAssertsFailedByRank.data(), 1, MPI_INT, 0, MPI_COMM_WORLD); + + if(rank == 0) { + separator_to_stream(); + s << Color::Cyan << "[doctest] " << Color::None << "assertions on all processes: " << std::setw(6) + << g_numAsserts << " | " + << ((g_numAsserts == 0 || anythingFailed) ? Color::None : Color::Green) + << std::setw(6) << (g_numAsserts - g_numAssertsFailed) << " passed" << Color::None + << " | " << (g_numAssertsFailed > 0 ? Color::Red : Color::None) << std::setw(6) + << g_numAssertsFailed << " failed" << Color::None << " |\n"; + if (nb_test_cases_skipped_insufficient_procs>0) { + s << Color::Cyan << "[doctest] " << Color::Yellow << "WARNING: Skipped "; + if (nb_test_cases_skipped_insufficient_procs>1) { + s << nb_test_cases_skipped_insufficient_procs << " tests requiring more than "; + } else { + s << nb_test_cases_skipped_insufficient_procs << " test requiring more than "; + } + if (mpi_comm_world_size()>1) { + s << mpi_comm_world_size() << " MPI processes to run\n"; + } else { + s << mpi_comm_world_size() << " MPI process to run\n"; + } + } + + separator_to_stream(); + if(g_numAssertsFailed > 0){ + + s << Color::Cyan << "[doctest] " << Color::None << "fail on rank:" << std::setw(6) << "\n"; + for(std::size_t i = 0; i < numAssertsFailedByRank.size(); ++i){ + if( numAssertsFailedByRank[i] > 0 ){ + s << std::setw(16) << " -> On rank [" << i << "] with " << numAssertsFailedByRank[i] << " test failed" << std::endl; + } + } + } + s << Color::Cyan << "[doctest] " << Color::None + << "Status: " << (g_numTestCasesFailed > 0 ? Color::Red : Color::Green) + << ((g_numTestCasesFailed > 0) ? "FAILURE!" : "SUCCESS!") << Color::None << std::endl; + } + } + + void test_case_end(const CurrentTestCaseStats& st) override { + if (is_mpi_test_case()) { + // function called by every rank at the end of a test + // if failed assertions happened, they have been sent to rank 0 + // here rank zero gathers them and prints them all + + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + std::vector requests; + requests.reserve(m_failure_str_queue.size()); // avoid realloc & copy of MPI_Request + for (const std::pair &failure : m_failure_str_queue) + { + const std::string & failure_str = failure.first; + const int failure_line = failure.second; + + int failure_msg_size = static_cast(failure_str.size()); + + requests.push_back(MPI_REQUEST_NULL); + MPI_Isend(failure_str.c_str(), failure_msg_size, MPI_BYTE, + 0, failure_line, MPI_COMM_WORLD, &requests.back()); // Tag = file line + } + + + // Compute the number of assert with fail among all procs + const int nb_fail_asserts = static_cast(m_failure_str_queue.size()); + int nb_fail_asserts_glob = 0; + MPI_Reduce(&nb_fail_asserts, &nb_fail_asserts_glob, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + + if(rank == 0) { + MPI_Status status; + MPI_Status status_recv; + + using id_string = std::pair; + std::vector msgs(static_cast(nb_fail_asserts_glob)); + + for (std::size_t i=0; i(nb_fail_asserts_glob); ++i) { + MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); + + int count; + MPI_Get_count(&status, MPI_BYTE, &count); + + std::string recv_msg(static_cast(count),'\0'); + void* recv_msg_data = const_cast(recv_msg.data()); // const_cast needed. Non-const .data() exists in C++11 though... + MPI_Recv(recv_msg_data, count, MPI_BYTE, status.MPI_SOURCE, + status.MPI_TAG, MPI_COMM_WORLD, &status_recv); + + msgs[i] = {status.MPI_SOURCE,recv_msg}; + } + + std::sort(begin(msgs),end(msgs),[](const id_string& x, const id_string& y){ return x.first < y.first; }); + + // print + if (nb_fail_asserts_glob>0) { + separator_to_stream(); + file_line_to_stream(tc->m_file.c_str(), static_cast(tc->m_line), "\n"); + if(tc->m_test_suite && tc->m_test_suite[0] != '\0') + s << Color::Yellow << "TEST SUITE: " << Color::None << tc->m_test_suite << "\n"; + if(strncmp(tc->m_name, " Scenario:", 11) != 0) + s << Color::Yellow << "TEST CASE: "; + s << Color::None << tc->m_name << "\n\n"; + for(const auto& msg : msgs) { + s << msg.second; + } + s << "\n"; + } + } + + MPI_Waitall(static_cast(requests.size()), requests.data(), MPI_STATUSES_IGNORE); + m_failure_str_queue.clear(); + } + + ConsoleReporter::test_case_end(st); + } + + bool is_mpi_test_case() const { + return tc->m_description != nullptr + && std::string(tc->m_description) == std::string("MPI_TEST_CASE"); + } + + void log_assert(const AssertData& rb) override { + if (!is_mpi_test_case()) { + ConsoleReporter::log_assert(rb); + } else { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + + if(!rb.m_failed && !opt.success) + return; + + std::lock_guard lock(mutex); + + std::stringstream failure_msg; + failure_msg << Color::Red << "On rank [" << rank << "] : " << Color::None; + failure_msg << file_line_to_string(rb.m_file, rb.m_line, " "); + + if((rb.m_at & (assertType::is_throws_as | assertType::is_throws_with)) ==0){ + failure_msg << Color::Cyan + << assertString(rb.m_at) + << "( " << rb.m_expr << " ) " + << Color::None + + << (!rb.m_failed ? "is correct!\n" : "is NOT correct!\n") + << " values: " + << assertString(rb.m_at) + << "( " << rb.m_decomp.c_str() << " )\n"; + } + + m_failure_str_queue.push_back({failure_msg.str(), rb.m_line}); + } + } +}; // MpiConsoleReporter + +// "1" is the priority - used for ordering when multiple reporters/listeners are used +REGISTER_REPORTER("MpiConsoleReporter", 1, MpiConsoleReporter); +REGISTER_REPORTER("MpiFileReporter", 1, MpiFileReporter); + +} // anonymous +} // doctest + +#endif // DOCTEST_REPORTER_H diff --git a/lib/doctest/extensions/mpi_sub_comm.h b/lib/doctest/extensions/mpi_sub_comm.h new file mode 100644 index 000000000..c030d74ac --- /dev/null +++ b/lib/doctest/extensions/mpi_sub_comm.h @@ -0,0 +1,84 @@ +#ifndef DOCTEST_MPI_SUB_COMM_H +#define DOCTEST_MPI_SUB_COMM_H + +#include "mpi.h" +#include "doctest/doctest.h" +#include +#include + +namespace doctest { + +inline +int mpi_world_nb_procs() { + int n; + MPI_Comm_size(MPI_COMM_WORLD, &n); + return n; +} + +struct mpi_sub_comm { + int nb_procs; + int rank; + MPI_Comm comm; + + mpi_sub_comm( mpi_sub_comm const& ) = delete; + mpi_sub_comm& operator=( mpi_sub_comm const& ) = delete; + + mpi_sub_comm(int nb_prcs) noexcept + : nb_procs(nb_prcs) + , rank(-1) + , comm(MPI_COMM_NULL) + { + int comm_world_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &comm_world_rank); + if (nb_procs>mpi_world_nb_procs()) { + if (comm_world_rank==0) { + MESSAGE( + "Unable to run test: need ", std::to_string(nb_procs), " procs", + " but program launched with only ", std::to_string(doctest::mpi_world_nb_procs()), "." + ); + CHECK(nb_procs<=mpi_world_nb_procs()); + } + } else { + int color = MPI_UNDEFINED; + if(comm_world_rank < nb_procs){ + color = 0; + } + MPI_Comm_split(MPI_COMM_WORLD, color, comm_world_rank, &comm); + + if(comm != MPI_COMM_NULL){ + MPI_Comm_rank(comm, &rank); + assert(rank==comm_world_rank); + } + } + } + + void destroy_comm() { + if(comm != MPI_COMM_NULL){ + MPI_Comm_free(&comm); + } + } + + mpi_sub_comm(mpi_sub_comm&& x) + : nb_procs(x.nb_procs) + , rank(x.rank) + , comm(x.comm) + { + x.comm = MPI_COMM_NULL; + } + mpi_sub_comm& operator=(mpi_sub_comm&& x) { + destroy_comm(); + nb_procs = x.nb_procs; + rank = x.rank; + comm = x.comm; + x.comm = MPI_COMM_NULL; + return *this; + } + + ~mpi_sub_comm() { + destroy_comm(); + } +}; + +} // doctest + +#endif // DOCTEST_SUB_COMM_H