diff --git a/poisson/CMakeLists.txt b/poisson/CMakeLists.txt index 0595383..6a0b1dc 100644 --- a/poisson/CMakeLists.txt +++ b/poisson/CMakeLists.txt @@ -27,6 +27,8 @@ arcane_add_arcane_libraries_to_target(Poisson) target_include_directories(Poisson PUBLIC . ${CMAKE_CURRENT_BINARY_DIR}) configure_file(Poisson.config ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(Test.L-shape.2D.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) +configure_file(Test.L-shape.2D.nwcsr.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) +configure_file(Test.L-shape.3D.nwcsr.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(Test.L-shape.2D.coo.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(Test.L-shape.2D.coo-sort.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) configure_file(Test.L-shape.3D.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) @@ -106,6 +108,8 @@ endif() arcanefem_add_gpu_test(NAME [poisson]gpu COMMAND ./Poisson ARGS Test.petsc.arc) +arcanefem_add_gpu_test(NAME [poisson]L-shape_2D_nwcsr_gpu COMMAND ./Poisson ARGS Test.L-shape.2D.nwcsr.arc) +arcanefem_add_gpu_test(NAME [poisson]L-shape_3D_nwcsr_gpu COMMAND ./Poisson ARGS Test.L-shape.2D.nwcsr.arc) arcanefem_add_gpu_test(NAME [poisson]hypre_direct_3D_csr-gpu COMMAND ./Poisson ARGS Test.sphere.3D.csr-gpu.arc) arcanefem_add_gpu_test(NAME [poisson]L-shape_2D_petsc_coo-gpu COMMAND ./Poisson ARGS Test.L-shape.2D.coo-gpu.arc) arcanefem_add_gpu_test(NAME [poisson]L-shape_3D_petsc_coo-gpu COMMAND ./Poisson ARGS Test.L-shape.3D.coo-gpu.arc) diff --git a/poisson/FemModule.cc b/poisson/FemModule.cc index fd10682..a93ed71 100644 --- a/poisson/FemModule.cc +++ b/poisson/FemModule.cc @@ -510,16 +510,24 @@ _doStationarySolve() if (m_use_nodewise_csr) { m_linear_system.clearValues(); - _assembleNodeWiseCsrBilinearOperatorTria3(); + if (options()->meshType == "TRIA3") + _assembleNodeWiseCsrBilinearOperatorTria3(); + if (options()->meshType == "TETRA4") + _assembleNodeWiseCsrBilinearOperatorTetra4(); if (m_cache_warming != 1) { m_time_stats->resetStats("AssembleNodeWiseCsrBilinearOperatorTria3"); + m_time_stats->resetStats("AssembleNodeWiseCsrBilinearOperatorTetra4"); for (cache_index = 1; cache_index < m_cache_warming; cache_index++) { m_linear_system.clearValues(); - _assembleNodeWiseCsrBilinearOperatorTria3(); + if (options()->meshType == "TRIA3") + _assembleNodeWiseCsrBilinearOperatorTria3(); + if (options()->meshType == "TETRA4") + _assembleNodeWiseCsrBilinearOperatorTetra4(); } } m_csr_matrix.translateToLinearSystem(m_linear_system); } + if (m_use_buildless_csr) { m_linear_system.clearValues(); if (options()->meshType == "TRIA3") diff --git a/poisson/FemModule.h b/poisson/FemModule.h index 7bfbb38..9974043 100644 --- a/poisson/FemModule.h +++ b/poisson/FemModule.h @@ -288,6 +288,7 @@ class FemModule void _buildMatrixNodeWiseCsr(); void _assembleNodeWiseCsrBilinearOperatorTria3(); + void _assembleNodeWiseCsrBilinearOperatorTetra4(); void _buildMatrixBuildLessCsr(); void _buildMatrixGpuBuildLessCsr(); diff --git a/poisson/NodeWiseCsrBiliAssembly.cc b/poisson/NodeWiseCsrBiliAssembly.cc index 321dac1..bb6c7b4 100644 --- a/poisson/NodeWiseCsrBiliAssembly.cc +++ b/poisson/NodeWiseCsrBiliAssembly.cc @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 //----------------------------------------------------------------------------- /*---------------------------------------------------------------------------*/ -/* BlcsrBiliAssembly.hxx (C) 2022-2024 */ +/* NWCSRiliAssembly.hxx (C) 2022-2024 */ /* */ /* Methods of the bilinear assembly phase using the csr data structure */ /* which avoid to add in the global matrix by iterating through the node. */ @@ -15,14 +15,34 @@ #include "FemModule.h" +/*---------------------------------------------------------------------------*/ +/** + * @brief Builds the node-wise CSR matrix for the finite element method. + * + * This function initializes and fills the CSR (Compressed Sparse Row) matrix + * based on the node-wise connectivity of the mesh. It supports different mesh + * types, specifically "TRIA3" and "TETRA4". + * + * For "TRIA3" mesh type, the function iterates over all nodes and sets the + * coordinates in the CSR matrix based on the node's faces. + * + * For "TETRA4" mesh type, the function iterates over all nodes and sets the + * coordinates in the CSR matrix based on the node's edges. + * + * The function also initializes the memory space required for the CSR matrix + * based on the number of nodes and edges/faces in the mesh. + */ +/*---------------------------------------------------------------------------*/ + void FemModule::_buildMatrixNodeWiseCsr() { - auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView()); // Compute the number of nnz and initialize the memory space - Int32 nnz = nbFace() * 2 + nbNode(); - m_csr_matrix.initialize(m_dof_family, nnz, nbNode()); + Int64 nbnde = nbNode(); + Int64 nedge = options()->meshType == "TETRA4" ? nbEdge() : nbFace(); + Int32 nnz = nedge * 2 + nbnde; + m_csr_matrix.initialize(m_dof_family, nnz, nbnde); /*removing the neoighbouring currently as it is useless // Creating a connectivity from node to their neighbouring nodes @@ -31,22 +51,41 @@ void FemModule::_buildMatrixNodeWiseCsr() idx_cn = mesh()->indexedConnectivityMng()->findOrCreateConnectivity(node_family, node_family, "NodeToNeighbourFaceNodes"); cn = idx_cn->connectivity(); */ - ENUMERATE_NODE (inode, allNodes()) { - //Since we compute the neighbouring connectivity here, we also fill the csr matrix + if (options()->meshType == "TRIA3") { + ENUMERATE_NODE (inode, allNodes()) { + + //Since we compute the neighbouring connectivity here, we also fill the csr matrix - Node node = *inode; + Node node = *inode; - m_csr_matrix.setCoordinates(node_dof.dofId(node, 0), node_dof.dofId(node, 0)); + m_csr_matrix.setCoordinates(node_dof.dofId(node, 0), node_dof.dofId(node, 0)); - for (Face face : node.faces()) { - if (face.nodeId(0) == node.localId()) { - // cn->addConnectedItem(node, face.node(0)); - m_csr_matrix.setCoordinates(node_dof.dofId(node, 0), node_dof.dofId(face.nodeId(1), 0)); + for (Face face : node.faces()) { + if (face.nodeId(0) == node.localId()) { + // cn->addConnectedItem(node, face.node(0)); + m_csr_matrix.setCoordinates(node_dof.dofId(node, 0), node_dof.dofId(face.nodeId(1), 0)); + } + else { + // cn->addConnectedItem(node, face.node(1)); + m_csr_matrix.setCoordinates(node_dof.dofId(node, 0), node_dof.dofId(face.nodeId(0), 0)); + } } - else { - // cn->addConnectedItem(node, face.node(1)); - m_csr_matrix.setCoordinates(node_dof.dofId(node, 0), node_dof.dofId(face.nodeId(0), 0)); + } + } + else if (options()->meshType == "TETRA4") { + ENUMERATE_NODE (inode, allNodes()) { + Node node = *inode; + Int32 node_dof_id = node_dof.dofId(node, 0); + ItemLocalIdT diagonal_entry(node_dof_id); + + m_csr_matrix.setCoordinates(diagonal_entry, diagonal_entry); + + for (Edge edge : node.edges()) { + if (edge.nodeId(0) == node.localId()) + m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(edge.nodeId(1), 0)); + else + m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(edge.nodeId(0), 0)); } } } @@ -57,9 +96,9 @@ void FemModule::_buildMatrixNodeWiseCsr() void FemModule::_assembleNodeWiseCsrBilinearOperatorTria3() { - Timer::Action timer_blcsr_bili(m_time_stats, "AssembleNodeWiseCsrBilinearOperatorTria3"); + Timer::Action timer_nwcsr_bili(m_time_stats, "AssembleNodeWiseCsrBilinearOperatorTria3"); { - Timer::Action timer_blcsr_build(m_time_stats, "NodeWiseCsrBuildMatrix"); + Timer::Action timer_nwcsr_build(m_time_stats, "NodeWiseCsrBuildMatrix"); // Build the csr matrix _buildMatrixNodeWiseCsr(); } @@ -85,7 +124,7 @@ void FemModule::_assembleNodeWiseCsrBilinearOperatorTria3() Arcane::ItemGenericInfoListView nodes_infos(this->mesh()->nodeFamily()); Arcane::ItemGenericInfoListView cells_infos(this->mesh()->cellFamily()); - Timer::Action timer_blcsr_add_compute(m_time_stats, "NodeWiseCsrAddAndCompute"); + Timer::Action timer_nwcsr_add_compute(m_time_stats, "NodeWiseCsrAddAndCompute"); command << RUNCOMMAND_ENUMERATE(Node, inode, allNodes()) { Int32 inode_index = 0; @@ -158,3 +197,98 @@ void FemModule::_assembleNodeWiseCsrBilinearOperatorTria3() /*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ +/** + * @brief Assembles the node-wise CSR bilinear operator for TETRA4 mesh type. + * + * This function builds the CSR matrix and computes the bilinear operator + * for a TETRA4 mesh type. It initializes the CSR matrix, sets up the + * necessary views, and performs the computation on the accelerator. + */ +void FemModule::_assembleNodeWiseCsrBilinearOperatorTetra4() +{ + Timer::Action timer_nwcsr_bili(m_time_stats, "AssembleNodeWiseCsrBilinearOperatorTetra4"); + { + Timer::Action timer_nwcsr_build(m_time_stats, "NodeWiseCsrBuildMatrix"); + _buildMatrixNodeWiseCsr(); + } + + RunQueue* queue = acceleratorMng()->defaultQueue(); + auto command = makeCommand(queue); + + auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView()); + auto in_row_csr = ax::viewIn(command, m_csr_matrix.m_matrix_row); + Int32 row_csr_size = m_csr_matrix.m_matrix_row.dim1Size(); + auto in_col_csr = ax::viewIn(command, m_csr_matrix.m_matrix_column); + Int32 col_csr_size = m_csr_matrix.m_matrix_column.dim1Size(); + auto in_out_val_csr = ax::viewInOut(command, m_csr_matrix.m_matrix_value); + auto in_node_coord = ax::viewIn(command, m_node_coord); + + UnstructuredMeshConnectivityView m_connectivity_view; + m_connectivity_view.setMesh(this->mesh()); + auto ncc = m_connectivity_view.nodeCell(); + auto cnc = m_connectivity_view.cellNode(); + Arcane::ItemGenericInfoListView nodes_infos(this->mesh()->nodeFamily()); + Arcane::ItemGenericInfoListView cells_infos(this->mesh()->cellFamily()); + + Timer::Action timer_nwcsr_add_compute(m_time_stats, "NodeWiseCsrAddAndCompute"); + command << RUNCOMMAND_ENUMERATE(Node, inode, allNodes()) + { + for (auto cell : ncc.cells(inode)) { + Int32 inode_index = 0; + for (Int32 i = 0; i < 4; ++i) { + if (inode == cnc.nodeId(cell, i)) { + inode_index = i; + break; + } + } + + Real3 m0 = in_node_coord[cnc.nodeId(cell, 0)]; + Real3 m1 = in_node_coord[cnc.nodeId(cell, 1)]; + Real3 m2 = in_node_coord[cnc.nodeId(cell, 2)]; + Real3 m3 = in_node_coord[cnc.nodeId(cell, 3)]; + + Real3 v0 = m1 - m0; + Real3 v1 = m2 - m0; + Real3 v2 = m3 - m0; + Real volume = std::abs(Arcane::math::dot(v0, Arcane::math::cross(v1, v2))) / 6.0; + + Real3 dPhi[4] = { + Arcane::math::cross(m2 - m1, m1 - m3), + Arcane::math::cross(m3 - m0, m0 - m2), + Arcane::math::cross(m1 - m0, m0 - m3), + Arcane::math::cross(m0 - m1, m1 - m2) + }; + + Real b_matrix[4][3] = { 0 }; + Real mul = (1.0 / (6.0 * volume)); + for (Int32 i = 0; i < 4; ++i) { + b_matrix[i][0] = dPhi[i].x * mul; + b_matrix[i][1] = dPhi[i].y * mul; + b_matrix[i][2] = dPhi[i].z * mul; + } + + for (Int32 i = 0; i < 4; ++i) { + Real x = 0.0; + for (Int32 k = 0; k < 3; ++k) { + x += b_matrix[inode_index][k] * b_matrix[i][k]; + } + if (nodes_infos.isOwn(inode)) { + Int32 row = node_dof.dofId(inode, 0).localId(); + Int32 col = node_dof.dofId(cnc.nodeId(cell, i), 0).localId(); + Int32 begin = in_row_csr[row]; + Int32 end = (row == row_csr_size - 1) ? col_csr_size : in_row_csr[row + 1]; + while (begin < end) { + if (in_col_csr[begin] == col) { + in_out_val_csr[begin] += x * volume; + break; + } + ++begin; + } + } + } + } + }; +} + +/*---------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ diff --git a/poisson/Test.L-shape.2D.nwcsr.arc b/poisson/Test.L-shape.2D.nwcsr.arc new file mode 100644 index 0000000..3584840 --- /dev/null +++ b/poisson/Test.L-shape.2D.nwcsr.arc @@ -0,0 +1,31 @@ + + + + L-shape 2D with CSR sparse matrix format Gpu compatible and node wise technique. The result of this test is compared with test_poisson_results.txt + PoissonLoop + + + + 1 + + + U + + + + + + L-shape.msh + + + + + true + test_poisson_results.txt + -1.0 + + boundary + 0.0 + + + diff --git a/poisson/Test.L-shape.3D.nwcsr.arc b/poisson/Test.L-shape.3D.nwcsr.arc new file mode 100644 index 0000000..ba854da --- /dev/null +++ b/poisson/Test.L-shape.3D.nwcsr.arc @@ -0,0 +1,39 @@ + + + + L-shape 3D with CSR sparse matrix format Gpu compatible and node wise technique. The result of this test is compared with test_poisson_results.txt + PoissonLoop + + + + 1 + + + U + + + + + + L-shape-3D.msh + + + + + true + 1.0 + test_3D_L-shape_poisson.txt + TETRA4 + + bot + 50.0 + + + bc + 10.0 + + + petsc + + +