diff --git a/poisson/CMakeLists.txt b/poisson/CMakeLists.txt index 0985e24..0595383 100644 --- a/poisson/CMakeLists.txt +++ b/poisson/CMakeLists.txt @@ -37,6 +37,7 @@ configure_file(Test.L-shape.3D.coo-sort.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY configure_file(Test.sphere.3D.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(Test.sphere.3D.csr.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(Test.sphere.3D.csr.no-edge.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) +configure_file(Test.sphere.3D.csr-gpu.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(Test.direct-solver.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(Test.neumann.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(Test.porous.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) @@ -105,6 +106,7 @@ endif() arcanefem_add_gpu_test(NAME [poisson]gpu COMMAND ./Poisson ARGS Test.petsc.arc) +arcanefem_add_gpu_test(NAME [poisson]hypre_direct_3D_csr-gpu COMMAND ./Poisson ARGS Test.sphere.3D.csr-gpu.arc) arcanefem_add_gpu_test(NAME [poisson]L-shape_2D_petsc_coo-gpu COMMAND ./Poisson ARGS Test.L-shape.2D.coo-gpu.arc) arcanefem_add_gpu_test(NAME [poisson]L-shape_3D_petsc_coo-gpu COMMAND ./Poisson ARGS Test.L-shape.3D.coo-gpu.arc) if(FEMUTILS_HAS_SOLVER_BACKEND_HYPRE) diff --git a/poisson/CsrGpuBiliAssembly.cc b/poisson/CsrGpuBiliAssembly.cc index 48dc341..8d3ec0e 100644 --- a/poisson/CsrGpuBiliAssembly.cc +++ b/poisson/CsrGpuBiliAssembly.cc @@ -14,33 +14,20 @@ #include "FemModule.h" -/** - * @brief Initialization of the csr matrix. It only works for p=1 since there is - * one node per Edge. Currently, there is no difference between buildMatrixCsr and this method. - * - * - */ +/*---------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ + void FemModule:: _buildMatrixCsrGPU() { - //Initialization of the csr matrix; - //This formula only works in p=1 - - /* - //Create a connection between nodes through the faces - //Useless here because we only need this information once - IItemFamily* node_family = mesh()->nodeFamily(); - NodeGroup nodes = node_family->allItems(); - auto idx_cn = mesh()->indexedConnectivityMng()->findOrCreateConnectivity(node_family, node_family, "NodeToNeighbourFaceNodes"); - auto* cn = idx_cn->connectivity(); - ENUMERATE_NODE (node, allNodes()) { - } - */ + auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView()); + + Int32 nnz = (options()->meshType == "TRIA3" ? nbFace() : nbEdge() * 2) + nbNode(); - Int32 nnz = nbFace() * 2 + nbNode(); m_csr_matrix.initialize(m_dof_family, nnz, nbNode()); - auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView()); + //We iterate through the node, and we do not sort anymore : we assume the nodes ID are sorted, and we will iterate throught the column to avoid making < and > comparison + if (options()->meshType == "TRIA3") { ENUMERATE_NODE (inode, allNodes()) { Node node = *inode; Int32 node_dof_id = node_dof.dofId(node, 0); @@ -55,9 +42,36 @@ _buildMatrixCsrGPU() m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(face.nodeId(0), 0)); } } + } + else if (options()->meshType == "TETRA4") { + ENUMERATE_NODE (inode, allNodes()) { + Node node = *inode; + Int32 node_dof_id = node_dof.dofId(node, 0); + ItemLocalIdT diagonal_entry(node_dof_id); + + m_csr_matrix.setCoordinates(diagonal_entry, diagonal_entry); + + for (Edge edge : node.edges()) { + if (edge.nodeId(0) == node.localId()) + m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(edge.nodeId(1), 0)); + else + m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(edge.nodeId(0), 0)); + } + } + } } /*---------------------------------------------------------------------------*/ +/** + * @brief Assembles the bilinear operator matrix for the FEM linear system with + * the CSR sparse matrix format for TRIA3 elements. + * + * The method performs the following steps: + * 1. Builds the CSR matrix using _buildMatrixCsrGPU. + * 2. For each cell, computes element matrix using _computeElementMatrixTRIA3GPU. + * 3. Assembles global matrix by adding contributions from each cell's element + * matrix to the corresponding entries in the global matrix. + */ /*---------------------------------------------------------------------------*/ void FemModule:: @@ -136,4 +150,79 @@ _assembleCsrGPUBilinearOperatorTRIA3() } /*---------------------------------------------------------------------------*/ +/** + * @brief Assembles the bilinear operator matrix for the FEM linear system with + * the CSR sparse matrix format for TETRA4 elements. + * + * The method performs the following steps: + * 1. Builds the CSR matrix using _buildMatrixCsrGPU. + * 2. For each cell, computes element matrix using _computeElementMatrixTETRA4GPU. + * 3. Assembles global matrix by adding contributions from each cell's element + * matrix to the corresponding entries in the global matrix. + */ +/*---------------------------------------------------------------------------*/ + +void FemModule:: +_assembleCsrGPUBilinearOperatorTETRA4() +{ + Timer::Action timer_gpu_bili(m_time_stats, "AssembleCsrGpuBilinearOperatorTetra4"); + + { + Timer::Action timer_gpu_build(m_time_stats, "CsrGpuBuildMatrix"); + _buildMatrixCsrGPU(); + } + + RunQueue* queue = acceleratorMng()->defaultQueue(); + auto command = makeCommand(queue); + + auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView()); + auto in_row_csr = ax::viewIn(command, m_csr_matrix.m_matrix_row); + Int32 row_csr_size = m_csr_matrix.m_matrix_row.dim1Size(); + auto in_col_csr = ax::viewIn(command, m_csr_matrix.m_matrix_column); + Int32 col_csr_size = m_csr_matrix.m_matrix_column.dim1Size(); + auto in_out_val_csr = ax::viewInOut(command, m_csr_matrix.m_matrix_value); + UnstructuredMeshConnectivityView m_connectivity_view; + auto in_node_coord = ax::viewIn(command, m_node_coord); + m_connectivity_view.setMesh(this->mesh()); + auto cnc = m_connectivity_view.cellNode(); + Arcane::ItemGenericInfoListView nodes_infos(this->mesh()->nodeFamily()); + + Timer::Action timer_add_compute(m_time_stats, "CsrGpuAddComputeLoop"); + + command << RUNCOMMAND_ENUMERATE(Cell, icell, allCells()) + { + + Real K_e[16] = { 0 }; + + _computeElementMatrixTETRA4GPU(icell, cnc, in_node_coord, K_e); + + Int32 n1_index = 0; + for (NodeLocalId node1 : cnc.nodes(icell)) { + Int32 n2_index = 0; + for (NodeLocalId node2 : cnc.nodes(icell)) { + double v = K_e[n1_index * 4 + n2_index]; + + if (nodes_infos.isOwn(node1)) { + + Int32 row = node_dof.dofId(node1, 0).localId(); + Int32 col = node_dof.dofId(node2, 0).localId(); + Int32 begin = in_row_csr[row]; + Int32 end = (row == row_csr_size - 1) ? col_csr_size : in_row_csr[row + 1]; + + while (begin < end) { + if (in_col_csr[begin] == col) { + ax::doAtomic(in_out_val_csr(begin), v); + break; + } + begin++; + } + } + ++n2_index; + } + ++n1_index; + } + }; +} + /*---------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ \ No newline at end of file diff --git a/poisson/FemModule.cc b/poisson/FemModule.cc index b910dbb..fd10682 100644 --- a/poisson/FemModule.cc +++ b/poisson/FemModule.cc @@ -489,12 +489,19 @@ _doStationarySolve() if (m_use_csr_gpu) { m_linear_system.clearValues(); - _assembleCsrGPUBilinearOperatorTRIA3(); + if (options()->meshType == "TRIA3") + _assembleCsrGPUBilinearOperatorTRIA3(); + if (options()->meshType == "TETRA4") + _assembleCsrGPUBilinearOperatorTETRA4(); if (m_cache_warming != 1) { m_time_stats->resetStats("AssembleCsrGpuBilinearOperatorTria3"); + m_time_stats->resetStats("AssembleCsrGpuBilinearOperatorTetra4"); for (cache_index = 1; cache_index < m_cache_warming; cache_index++) { m_linear_system.clearValues(); - _assembleCsrGPUBilinearOperatorTRIA3(); + if (options()->meshType == "TRIA3") + _assembleCsrGPUBilinearOperatorTRIA3(); + if (options()->meshType == "TETRA4") + _assembleCsrGPUBilinearOperatorTETRA4(); } } m_csr_matrix.translateToLinearSystem(m_linear_system); diff --git a/poisson/FemModule.h b/poisson/FemModule.h index a1335d1..7bfbb38 100644 --- a/poisson/FemModule.h +++ b/poisson/FemModule.h @@ -224,6 +224,7 @@ class FemModule void _applyDirichletBoundaryConditionsGpu(); void _assembleCsrGpuLinearOperator(); + static ARCCORE_HOST_DEVICE Int32 _getValIndexCsrGpu(Int32 begin, Int32 end, DoFLocalId col, ax::NumArrayView, MDDim1, DefaultLayout> csr_col); @@ -262,11 +263,13 @@ class FemModule _computeElementMatrixTETRA4GPU(CellLocalId icell, IndexedCellNodeConnectivityView cnc, ax::VariableNodeReal3InView in_node_coord, Real K_e[16]); + private: public: void _buildMatrixCsrGPU(); void _assembleCsrGPUBilinearOperatorTRIA3(); + void _assembleCsrGPUBilinearOperatorTETRA4(); private: public: @@ -437,6 +440,7 @@ _computeElementMatrixTETRA4GPU(CellLocalId icell, IndexedCellNodeConnectivityVie } } } + /*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ diff --git a/poisson/Test.sphere.3D.csr-gpu.arc b/poisson/Test.sphere.3D.csr-gpu.arc new file mode 100644 index 0000000..5c45376 --- /dev/null +++ b/poisson/Test.sphere.3D.csr-gpu.arc @@ -0,0 +1,43 @@ + + + + Sphere 3D with CSR sparse matrix format Gpu compatible. The result of this test is compared with test_sphere_3D_results.txt + PoissonLoop + + + + 1 + + + U + + + + + + sphere_cut.msh + + + + + test_sphere_3D_results.txt + -0.01 + TETRA4 + Penalty + 1.e31 + + sphere + 0.0 + + + Cut + 10.0 + + + 0. + 1e-5 + 0.55 + + true + +