From bbabecf34f1c136ee1f83d226524a657696472e6 Mon Sep 17 00:00:00 2001 From: Charles-Antoine Leger Date: Mon, 7 Oct 2024 11:27:16 +0200 Subject: [PATCH] feat[poisson]: Support 3D mesh with CSR GPU + tests --- poisson/CMakeLists.txt | 2 + poisson/CsrGpuBiliAssembly.cc | 131 ++++++++++++++++++++++++----- poisson/FemModule.cc | 11 ++- poisson/FemModule.h | 81 +++++++++++++++++- poisson/Test.sphere.3D.csr-gpu.arc | 43 ++++++++++ 5 files changed, 244 insertions(+), 24 deletions(-) create mode 100644 poisson/Test.sphere.3D.csr-gpu.arc diff --git a/poisson/CMakeLists.txt b/poisson/CMakeLists.txt index 01d574f..53c3956 100644 --- a/poisson/CMakeLists.txt +++ b/poisson/CMakeLists.txt @@ -30,6 +30,7 @@ configure_file(Test.L-shape.3D.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(Test.sphere.3D.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(Test.sphere.3D.csr.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(Test.sphere.3D.csr.no-edge.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) +configure_file(Test.sphere.3D.csr-gpu.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(Test.direct-solver.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(Test.neumann.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(Test.porous.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) @@ -94,6 +95,7 @@ endif() arcanefem_add_gpu_test(NAME [poisson]gpu COMMAND ./Poisson ARGS Test.petsc.arc) +arcanefem_add_gpu_test(NAME [poisson]hypre_direct_3D_csr-gpu COMMAND ./Poisson ARGS Test.sphere.3D.csr-gpu.arc) if(FEMUTILS_HAS_SOLVER_BACKEND_HYPRE) arcanefem_add_gpu_test(NAME [poisson]hypre_direct_gpu COMMAND ./Poisson ARGS Test.hypre_direct.arc) arcanefem_add_gpu_test(NAME [poisson]hypre_direct_3D_gpu COMMAND ./Poisson ARGS Test.sphere.3D.arc) diff --git a/poisson/CsrGpuBiliAssembly.cc b/poisson/CsrGpuBiliAssembly.cc index 48dc341..8d3ec0e 100644 --- a/poisson/CsrGpuBiliAssembly.cc +++ b/poisson/CsrGpuBiliAssembly.cc @@ -14,33 +14,20 @@ #include "FemModule.h" -/** - * @brief Initialization of the csr matrix. It only works for p=1 since there is - * one node per Edge. Currently, there is no difference between buildMatrixCsr and this method. - * - * - */ +/*---------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ + void FemModule:: _buildMatrixCsrGPU() { - //Initialization of the csr matrix; - //This formula only works in p=1 - - /* - //Create a connection between nodes through the faces - //Useless here because we only need this information once - IItemFamily* node_family = mesh()->nodeFamily(); - NodeGroup nodes = node_family->allItems(); - auto idx_cn = mesh()->indexedConnectivityMng()->findOrCreateConnectivity(node_family, node_family, "NodeToNeighbourFaceNodes"); - auto* cn = idx_cn->connectivity(); - ENUMERATE_NODE (node, allNodes()) { - } - */ + auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView()); + + Int32 nnz = (options()->meshType == "TRIA3" ? nbFace() : nbEdge() * 2) + nbNode(); - Int32 nnz = nbFace() * 2 + nbNode(); m_csr_matrix.initialize(m_dof_family, nnz, nbNode()); - auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView()); + //We iterate through the node, and we do not sort anymore : we assume the nodes ID are sorted, and we will iterate throught the column to avoid making < and > comparison + if (options()->meshType == "TRIA3") { ENUMERATE_NODE (inode, allNodes()) { Node node = *inode; Int32 node_dof_id = node_dof.dofId(node, 0); @@ -55,9 +42,36 @@ _buildMatrixCsrGPU() m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(face.nodeId(0), 0)); } } + } + else if (options()->meshType == "TETRA4") { + ENUMERATE_NODE (inode, allNodes()) { + Node node = *inode; + Int32 node_dof_id = node_dof.dofId(node, 0); + ItemLocalIdT diagonal_entry(node_dof_id); + + m_csr_matrix.setCoordinates(diagonal_entry, diagonal_entry); + + for (Edge edge : node.edges()) { + if (edge.nodeId(0) == node.localId()) + m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(edge.nodeId(1), 0)); + else + m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(edge.nodeId(0), 0)); + } + } + } } /*---------------------------------------------------------------------------*/ +/** + * @brief Assembles the bilinear operator matrix for the FEM linear system with + * the CSR sparse matrix format for TRIA3 elements. + * + * The method performs the following steps: + * 1. Builds the CSR matrix using _buildMatrixCsrGPU. + * 2. For each cell, computes element matrix using _computeElementMatrixTRIA3GPU. + * 3. Assembles global matrix by adding contributions from each cell's element + * matrix to the corresponding entries in the global matrix. + */ /*---------------------------------------------------------------------------*/ void FemModule:: @@ -136,4 +150,79 @@ _assembleCsrGPUBilinearOperatorTRIA3() } /*---------------------------------------------------------------------------*/ +/** + * @brief Assembles the bilinear operator matrix for the FEM linear system with + * the CSR sparse matrix format for TETRA4 elements. + * + * The method performs the following steps: + * 1. Builds the CSR matrix using _buildMatrixCsrGPU. + * 2. For each cell, computes element matrix using _computeElementMatrixTETRA4GPU. + * 3. Assembles global matrix by adding contributions from each cell's element + * matrix to the corresponding entries in the global matrix. + */ +/*---------------------------------------------------------------------------*/ + +void FemModule:: +_assembleCsrGPUBilinearOperatorTETRA4() +{ + Timer::Action timer_gpu_bili(m_time_stats, "AssembleCsrGpuBilinearOperatorTetra4"); + + { + Timer::Action timer_gpu_build(m_time_stats, "CsrGpuBuildMatrix"); + _buildMatrixCsrGPU(); + } + + RunQueue* queue = acceleratorMng()->defaultQueue(); + auto command = makeCommand(queue); + + auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView()); + auto in_row_csr = ax::viewIn(command, m_csr_matrix.m_matrix_row); + Int32 row_csr_size = m_csr_matrix.m_matrix_row.dim1Size(); + auto in_col_csr = ax::viewIn(command, m_csr_matrix.m_matrix_column); + Int32 col_csr_size = m_csr_matrix.m_matrix_column.dim1Size(); + auto in_out_val_csr = ax::viewInOut(command, m_csr_matrix.m_matrix_value); + UnstructuredMeshConnectivityView m_connectivity_view; + auto in_node_coord = ax::viewIn(command, m_node_coord); + m_connectivity_view.setMesh(this->mesh()); + auto cnc = m_connectivity_view.cellNode(); + Arcane::ItemGenericInfoListView nodes_infos(this->mesh()->nodeFamily()); + + Timer::Action timer_add_compute(m_time_stats, "CsrGpuAddComputeLoop"); + + command << RUNCOMMAND_ENUMERATE(Cell, icell, allCells()) + { + + Real K_e[16] = { 0 }; + + _computeElementMatrixTETRA4GPU(icell, cnc, in_node_coord, K_e); + + Int32 n1_index = 0; + for (NodeLocalId node1 : cnc.nodes(icell)) { + Int32 n2_index = 0; + for (NodeLocalId node2 : cnc.nodes(icell)) { + double v = K_e[n1_index * 4 + n2_index]; + + if (nodes_infos.isOwn(node1)) { + + Int32 row = node_dof.dofId(node1, 0).localId(); + Int32 col = node_dof.dofId(node2, 0).localId(); + Int32 begin = in_row_csr[row]; + Int32 end = (row == row_csr_size - 1) ? col_csr_size : in_row_csr[row + 1]; + + while (begin < end) { + if (in_col_csr[begin] == col) { + ax::doAtomic(in_out_val_csr(begin), v); + break; + } + begin++; + } + } + ++n2_index; + } + ++n1_index; + } + }; +} + /*---------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ \ No newline at end of file diff --git a/poisson/FemModule.cc b/poisson/FemModule.cc index 99bce2e..5dd8b53 100644 --- a/poisson/FemModule.cc +++ b/poisson/FemModule.cc @@ -456,12 +456,19 @@ _doStationarySolve() #ifdef ARCANE_HAS_ACCELERATOR if (m_use_csr_gpu) { m_linear_system.clearValues(); - _assembleCsrGPUBilinearOperatorTRIA3(); + if (options()->meshType == "TRIA3") + _assembleCsrGPUBilinearOperatorTRIA3(); + if (options()->meshType == "TETRA4") + _assembleCsrGPUBilinearOperatorTETRA4(); if (m_cache_warming != 1) { m_time_stats->resetStats("AssembleCsrGpuBilinearOperatorTria3"); + m_time_stats->resetStats("AssembleCsrGpuBilinearOperatorTetra4"); for (cache_index = 1; cache_index < m_cache_warming; cache_index++) { m_linear_system.clearValues(); - _assembleCsrGPUBilinearOperatorTRIA3(); + if (options()->meshType == "TRIA3") + _assembleCsrGPUBilinearOperatorTRIA3(); + if (options()->meshType == "TETRA4") + _assembleCsrGPUBilinearOperatorTETRA4(); } } m_csr_matrix.translateToLinearSystem(m_linear_system); diff --git a/poisson/FemModule.h b/poisson/FemModule.h index b0f2198..2854081 100644 --- a/poisson/FemModule.h +++ b/poisson/FemModule.h @@ -223,6 +223,7 @@ class FemModule void _applyDirichletBoundaryConditionsGpu(); void _assembleCsrGpuLinearOperator(); + static ARCCORE_HOST_DEVICE Int32 _getValIndexCsrGpu(Int32 begin, Int32 end, DoFLocalId col, ax::NumArrayView, MDDim1, DefaultLayout> csr_col); @@ -255,6 +256,10 @@ class FemModule _computeElementMatrixTRIA3GPU(CellLocalId icell, IndexedCellNodeConnectivityView cnc, ax::VariableNodeReal3InView in_node_coord, Real K_e[9]); + static ARCCORE_HOST_DEVICE inline void + _computeElementMatrixTETRA4GPU(CellLocalId icell, IndexedCellNodeConnectivityView cnc, + ax::VariableNodeReal3InView in_node_coord, Real K_e[12]); + private: @@ -262,6 +267,7 @@ class FemModule void _buildMatrixCsrGPU(); void _assembleCsrGPUBilinearOperatorTRIA3(); + void _assembleCsrGPUBilinearOperatorTETRA4(); private: @@ -364,4 +370,77 @@ _computeElementMatrixTRIA3GPU(CellLocalId icell, IndexedCellNodeConnectivityView //return int_cdPi_dPj; } -#endif +/*---------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ + +ARCCORE_HOST_DEVICE inline void FemModule:: +_computeElementMatrixTETRA4GPU(CellLocalId icell, IndexedCellNodeConnectivityView cnc, + ax::VariableNodeReal3InView in_node_coord, Real K_e[16]) +{ + // Get coordinates of the triangle element TETRA4 + //------------------------------------------------ + // 3 o + // /|\ + // / | \ + // / | \ + // / o 2 \ + // / . . \ + // o-----------o + // 0 1 + //------------------------------------------------ + Real3 m0 = in_node_coord[cnc.nodeId(icell, 0)]; + Real3 m1 = in_node_coord[cnc.nodeId(icell, 1)]; + Real3 m2 = in_node_coord[cnc.nodeId(icell, 2)]; + Real3 m3 = in_node_coord[cnc.nodeId(icell, 3)]; + + Real3 v0 = m1 - m0; + Real3 v1 = m2 - m0; + Real3 v2 = m3 - m0; + + Real volume = std::abs(Arcane::math::dot(v0, Arcane::math::cross(v1, v2))) / 6.0; + + // Compute gradients of shape functions + Real3 dPhi0 = Arcane::math::cross(m2 - m1, m1 - m3); + Real3 dPhi1 = Arcane::math::cross(m3 - m0, m0 - m2); + Real3 dPhi2 = Arcane::math::cross(m1 - m0, m0 - m3); + Real3 dPhi3 = Arcane::math::cross(m0 - m1, m1 - m2); + + Real mul = 1.0 / (6.0 * volume); + + // Construct the B-matrix + Real b_matrix[3][4]; + b_matrix[0][0] = dPhi0.x; + b_matrix[1][0] = dPhi0.y; + b_matrix[2][0] = dPhi0.z; + + b_matrix[0][1] = dPhi1.x; + b_matrix[1][1] = dPhi1.y; + b_matrix[2][1] = dPhi1.z; + + b_matrix[0][2] = dPhi2.x; + b_matrix[1][2] = dPhi2.y; + b_matrix[2][2] = dPhi2.z; + + b_matrix[0][3] = dPhi3.x; + b_matrix[1][3] = dPhi3.y; + b_matrix[2][3] = dPhi3.z; + + for (Int32 i = 0; i < 3; ++i) + for (Int32 j = 0; j < 4; ++j) + b_matrix[i][j] *= mul; + + // Compute the element matrix + for (Int32 i = 0; i < 4; ++i) { + for (Int32 j = i; j < 4; ++j) { + for (Int32 k = 0; k < 3; ++k) + K_e[i * 4 + j] += b_matrix[k][i] * b_matrix[k][j]; + K_e[i * 4 + j] *= volume; + K_e[j * 4 + i] = K_e[i * 4 + j]; + } + } +} + +/*---------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ + +#endif \ No newline at end of file diff --git a/poisson/Test.sphere.3D.csr-gpu.arc b/poisson/Test.sphere.3D.csr-gpu.arc new file mode 100644 index 0000000..5c45376 --- /dev/null +++ b/poisson/Test.sphere.3D.csr-gpu.arc @@ -0,0 +1,43 @@ + + + + Sphere 3D with CSR sparse matrix format Gpu compatible. The result of this test is compared with test_sphere_3D_results.txt + PoissonLoop + + + + 1 + + + U + + + + + + sphere_cut.msh + + + + + test_sphere_3D_results.txt + -0.01 + TETRA4 + Penalty + 1.e31 + + sphere + 0.0 + + + Cut + 10.0 + + + 0. + 1e-5 + 0.55 + + true + +