Skip to content

Commit

Permalink
Merge pull request #167 from toutane/dev/cal-poisson-csr-gpu-3d-support
Browse files Browse the repository at this point in the history
feat[poisson]: Support 3D mesh with CSR GPU
  • Loading branch information
mohd-afeef-badri authored Oct 11, 2024
2 parents 689a80a + 301fc44 commit 1678104
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 23 deletions.
2 changes: 2 additions & 0 deletions poisson/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ configure_file(Test.L-shape.3D.coo-sort.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY
configure_file(Test.sphere.3D.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
configure_file(Test.sphere.3D.csr.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
configure_file(Test.sphere.3D.csr.no-edge.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
configure_file(Test.sphere.3D.csr-gpu.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
configure_file(Test.direct-solver.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
configure_file(Test.neumann.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
configure_file(Test.porous.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
Expand Down Expand Up @@ -105,6 +106,7 @@ endif()


arcanefem_add_gpu_test(NAME [poisson]gpu COMMAND ./Poisson ARGS Test.petsc.arc)
arcanefem_add_gpu_test(NAME [poisson]hypre_direct_3D_csr-gpu COMMAND ./Poisson ARGS Test.sphere.3D.csr-gpu.arc)
arcanefem_add_gpu_test(NAME [poisson]L-shape_2D_petsc_coo-gpu COMMAND ./Poisson ARGS Test.L-shape.2D.coo-gpu.arc)
arcanefem_add_gpu_test(NAME [poisson]L-shape_3D_petsc_coo-gpu COMMAND ./Poisson ARGS Test.L-shape.3D.coo-gpu.arc)
if(FEMUTILS_HAS_SOLVER_BACKEND_HYPRE)
Expand Down
131 changes: 110 additions & 21 deletions poisson/CsrGpuBiliAssembly.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,20 @@

#include "FemModule.h"

/**
* @brief Initialization of the csr matrix. It only works for p=1 since there is
* one node per Edge. Currently, there is no difference between buildMatrixCsr and this method.
*
*
*/
/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

void FemModule::
_buildMatrixCsrGPU()
{
//Initialization of the csr matrix;
//This formula only works in p=1

/*
//Create a connection between nodes through the faces
//Useless here because we only need this information once
IItemFamily* node_family = mesh()->nodeFamily();
NodeGroup nodes = node_family->allItems();
auto idx_cn = mesh()->indexedConnectivityMng()->findOrCreateConnectivity(node_family, node_family, "NodeToNeighbourFaceNodes");
auto* cn = idx_cn->connectivity();
ENUMERATE_NODE (node, allNodes()) {
}
*/
auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView());

Int32 nnz = (options()->meshType == "TRIA3" ? nbFace() : nbEdge() * 2) + nbNode();

Int32 nnz = nbFace() * 2 + nbNode();
m_csr_matrix.initialize(m_dof_family, nnz, nbNode());
auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView());

//We iterate through the node, and we do not sort anymore : we assume the nodes ID are sorted, and we will iterate throught the column to avoid making < and > comparison
if (options()->meshType == "TRIA3") {
ENUMERATE_NODE (inode, allNodes()) {
Node node = *inode;
Int32 node_dof_id = node_dof.dofId(node, 0);
Expand All @@ -55,9 +42,36 @@ _buildMatrixCsrGPU()
m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(face.nodeId(0), 0));
}
}
}
else if (options()->meshType == "TETRA4") {
ENUMERATE_NODE (inode, allNodes()) {
Node node = *inode;
Int32 node_dof_id = node_dof.dofId(node, 0);
ItemLocalIdT<DoF> diagonal_entry(node_dof_id);

m_csr_matrix.setCoordinates(diagonal_entry, diagonal_entry);

for (Edge edge : node.edges()) {
if (edge.nodeId(0) == node.localId())
m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(edge.nodeId(1), 0));
else
m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(edge.nodeId(0), 0));
}
}
}
}

/*---------------------------------------------------------------------------*/
/**
* @brief Assembles the bilinear operator matrix for the FEM linear system with
* the CSR sparse matrix format for TRIA3 elements.
*
* The method performs the following steps:
* 1. Builds the CSR matrix using _buildMatrixCsrGPU.
* 2. For each cell, computes element matrix using _computeElementMatrixTRIA3GPU.
* 3. Assembles global matrix by adding contributions from each cell's element
* matrix to the corresponding entries in the global matrix.
*/
/*---------------------------------------------------------------------------*/

void FemModule::
Expand Down Expand Up @@ -136,4 +150,79 @@ _assembleCsrGPUBilinearOperatorTRIA3()
}

/*---------------------------------------------------------------------------*/
/**
* @brief Assembles the bilinear operator matrix for the FEM linear system with
* the CSR sparse matrix format for TETRA4 elements.
*
* The method performs the following steps:
* 1. Builds the CSR matrix using _buildMatrixCsrGPU.
* 2. For each cell, computes element matrix using _computeElementMatrixTETRA4GPU.
* 3. Assembles global matrix by adding contributions from each cell's element
* matrix to the corresponding entries in the global matrix.
*/
/*---------------------------------------------------------------------------*/

void FemModule::
_assembleCsrGPUBilinearOperatorTETRA4()
{
Timer::Action timer_gpu_bili(m_time_stats, "AssembleCsrGpuBilinearOperatorTetra4");

{
Timer::Action timer_gpu_build(m_time_stats, "CsrGpuBuildMatrix");
_buildMatrixCsrGPU();
}

RunQueue* queue = acceleratorMng()->defaultQueue();
auto command = makeCommand(queue);

auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView());
auto in_row_csr = ax::viewIn(command, m_csr_matrix.m_matrix_row);
Int32 row_csr_size = m_csr_matrix.m_matrix_row.dim1Size();
auto in_col_csr = ax::viewIn(command, m_csr_matrix.m_matrix_column);
Int32 col_csr_size = m_csr_matrix.m_matrix_column.dim1Size();
auto in_out_val_csr = ax::viewInOut(command, m_csr_matrix.m_matrix_value);
UnstructuredMeshConnectivityView m_connectivity_view;
auto in_node_coord = ax::viewIn(command, m_node_coord);
m_connectivity_view.setMesh(this->mesh());
auto cnc = m_connectivity_view.cellNode();
Arcane::ItemGenericInfoListView nodes_infos(this->mesh()->nodeFamily());

Timer::Action timer_add_compute(m_time_stats, "CsrGpuAddComputeLoop");

command << RUNCOMMAND_ENUMERATE(Cell, icell, allCells())
{

Real K_e[16] = { 0 };

_computeElementMatrixTETRA4GPU(icell, cnc, in_node_coord, K_e);

Int32 n1_index = 0;
for (NodeLocalId node1 : cnc.nodes(icell)) {
Int32 n2_index = 0;
for (NodeLocalId node2 : cnc.nodes(icell)) {
double v = K_e[n1_index * 4 + n2_index];

if (nodes_infos.isOwn(node1)) {

Int32 row = node_dof.dofId(node1, 0).localId();
Int32 col = node_dof.dofId(node2, 0).localId();
Int32 begin = in_row_csr[row];
Int32 end = (row == row_csr_size - 1) ? col_csr_size : in_row_csr[row + 1];

while (begin < end) {
if (in_col_csr[begin] == col) {
ax::doAtomic<ax::eAtomicOperation::Add>(in_out_val_csr(begin), v);
break;
}
begin++;
}
}
++n2_index;
}
++n1_index;
}
};
}

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
11 changes: 9 additions & 2 deletions poisson/FemModule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -489,12 +489,19 @@ _doStationarySolve()

if (m_use_csr_gpu) {
m_linear_system.clearValues();
_assembleCsrGPUBilinearOperatorTRIA3();
if (options()->meshType == "TRIA3")
_assembleCsrGPUBilinearOperatorTRIA3();
if (options()->meshType == "TETRA4")
_assembleCsrGPUBilinearOperatorTETRA4();
if (m_cache_warming != 1) {
m_time_stats->resetStats("AssembleCsrGpuBilinearOperatorTria3");
m_time_stats->resetStats("AssembleCsrGpuBilinearOperatorTetra4");
for (cache_index = 1; cache_index < m_cache_warming; cache_index++) {
m_linear_system.clearValues();
_assembleCsrGPUBilinearOperatorTRIA3();
if (options()->meshType == "TRIA3")
_assembleCsrGPUBilinearOperatorTRIA3();
if (options()->meshType == "TETRA4")
_assembleCsrGPUBilinearOperatorTETRA4();
}
}
m_csr_matrix.translateToLinearSystem(m_linear_system);
Expand Down
4 changes: 4 additions & 0 deletions poisson/FemModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ class FemModule

void _applyDirichletBoundaryConditionsGpu();
void _assembleCsrGpuLinearOperator();

static ARCCORE_HOST_DEVICE Int32
_getValIndexCsrGpu(Int32 begin, Int32 end, DoFLocalId col, ax::NumArrayView<DataViewGetter<Int32>, MDDim1, DefaultLayout> csr_col);

Expand Down Expand Up @@ -262,11 +263,13 @@ class FemModule
_computeElementMatrixTETRA4GPU(CellLocalId icell, IndexedCellNodeConnectivityView cnc,
ax::VariableNodeReal3InView in_node_coord, Real K_e[16]);


private:
public:

void _buildMatrixCsrGPU();
void _assembleCsrGPUBilinearOperatorTRIA3();
void _assembleCsrGPUBilinearOperatorTETRA4();

private:
public:
Expand Down Expand Up @@ -437,6 +440,7 @@ _computeElementMatrixTETRA4GPU(CellLocalId icell, IndexedCellNodeConnectivityVie
}
}
}

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

Expand Down
43 changes: 43 additions & 0 deletions poisson/Test.sphere.3D.csr-gpu.arc
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?xml version="1.0"?>
<case codename="Poisson" xml:lang="en" codeversion="1.0">
<arcane>
<title>Sphere 3D with CSR sparse matrix format Gpu compatible. The result of this test is compared with test_sphere_3D_results.txt</title>
<timeloop>PoissonLoop</timeloop>
</arcane>

<arcane-post-processing>
<output-period>1</output-period>
<format name="VtkHdfV2PostProcessor" />
<output>
<variable>U</variable>
</output>
</arcane-post-processing>

<meshes>
<mesh>
<filename>sphere_cut.msh</filename>
</mesh>
</meshes>

<fem>
<result-file>test_sphere_3D_results.txt</result-file>
<f>-0.01</f>
<mesh-type>TETRA4</mesh-type>
<enforce-Dirichlet-method>Penalty</enforce-Dirichlet-method>
<penalty>1.e31</penalty>
<dirichlet-boundary-condition>
<surface>sphere</surface>
<value>0.0</value>
</dirichlet-boundary-condition>
<dirichlet-boundary-condition>
<surface>Cut</surface>
<value>10.0</value>
</dirichlet-boundary-condition>
<linear-system name="HypreLinearSystem">
<rtol>0.</rtol>
<atol>1e-5</atol>
<amg-threshold>0.55</amg-threshold>
</linear-system>
<csr-gpu>true</csr-gpu>
</fem>
</case>

0 comments on commit 1678104

Please sign in to comment.