Skip to content

Commit

Permalink
feat[poisson]: Support 3D mesh with CSR GPU + tests
Browse files Browse the repository at this point in the history
  • Loading branch information
toutane committed Oct 7, 2024
1 parent 716c082 commit bbabecf
Show file tree
Hide file tree
Showing 5 changed files with 244 additions and 24 deletions.
2 changes: 2 additions & 0 deletions poisson/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ configure_file(Test.L-shape.3D.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
configure_file(Test.sphere.3D.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
configure_file(Test.sphere.3D.csr.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
configure_file(Test.sphere.3D.csr.no-edge.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
configure_file(Test.sphere.3D.csr-gpu.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
configure_file(Test.direct-solver.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
configure_file(Test.neumann.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
configure_file(Test.porous.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
Expand Down Expand Up @@ -94,6 +95,7 @@ endif()


arcanefem_add_gpu_test(NAME [poisson]gpu COMMAND ./Poisson ARGS Test.petsc.arc)
arcanefem_add_gpu_test(NAME [poisson]hypre_direct_3D_csr-gpu COMMAND ./Poisson ARGS Test.sphere.3D.csr-gpu.arc)
if(FEMUTILS_HAS_SOLVER_BACKEND_HYPRE)
arcanefem_add_gpu_test(NAME [poisson]hypre_direct_gpu COMMAND ./Poisson ARGS Test.hypre_direct.arc)
arcanefem_add_gpu_test(NAME [poisson]hypre_direct_3D_gpu COMMAND ./Poisson ARGS Test.sphere.3D.arc)
Expand Down
131 changes: 110 additions & 21 deletions poisson/CsrGpuBiliAssembly.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,20 @@

#include "FemModule.h"

/**
* @brief Initialization of the csr matrix. It only works for p=1 since there is
* one node per Edge. Currently, there is no difference between buildMatrixCsr and this method.
*
*
*/
/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

void FemModule::
_buildMatrixCsrGPU()
{
//Initialization of the csr matrix;
//This formula only works in p=1

/*
//Create a connection between nodes through the faces
//Useless here because we only need this information once
IItemFamily* node_family = mesh()->nodeFamily();
NodeGroup nodes = node_family->allItems();
auto idx_cn = mesh()->indexedConnectivityMng()->findOrCreateConnectivity(node_family, node_family, "NodeToNeighbourFaceNodes");
auto* cn = idx_cn->connectivity();
ENUMERATE_NODE (node, allNodes()) {
}
*/
auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView());

Int32 nnz = (options()->meshType == "TRIA3" ? nbFace() : nbEdge() * 2) + nbNode();

Int32 nnz = nbFace() * 2 + nbNode();
m_csr_matrix.initialize(m_dof_family, nnz, nbNode());
auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView());

//We iterate through the node, and we do not sort anymore : we assume the nodes ID are sorted, and we will iterate throught the column to avoid making < and > comparison
if (options()->meshType == "TRIA3") {
ENUMERATE_NODE (inode, allNodes()) {
Node node = *inode;
Int32 node_dof_id = node_dof.dofId(node, 0);
Expand All @@ -55,9 +42,36 @@ _buildMatrixCsrGPU()
m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(face.nodeId(0), 0));
}
}
}
else if (options()->meshType == "TETRA4") {
ENUMERATE_NODE (inode, allNodes()) {
Node node = *inode;
Int32 node_dof_id = node_dof.dofId(node, 0);
ItemLocalIdT<DoF> diagonal_entry(node_dof_id);

m_csr_matrix.setCoordinates(diagonal_entry, diagonal_entry);

for (Edge edge : node.edges()) {
if (edge.nodeId(0) == node.localId())
m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(edge.nodeId(1), 0));
else
m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(edge.nodeId(0), 0));
}
}
}
}

/*---------------------------------------------------------------------------*/
/**
* @brief Assembles the bilinear operator matrix for the FEM linear system with
* the CSR sparse matrix format for TRIA3 elements.
*
* The method performs the following steps:
* 1. Builds the CSR matrix using _buildMatrixCsrGPU.
* 2. For each cell, computes element matrix using _computeElementMatrixTRIA3GPU.
* 3. Assembles global matrix by adding contributions from each cell's element
* matrix to the corresponding entries in the global matrix.
*/
/*---------------------------------------------------------------------------*/

void FemModule::
Expand Down Expand Up @@ -136,4 +150,79 @@ _assembleCsrGPUBilinearOperatorTRIA3()
}

/*---------------------------------------------------------------------------*/
/**
* @brief Assembles the bilinear operator matrix for the FEM linear system with
* the CSR sparse matrix format for TETRA4 elements.
*
* The method performs the following steps:
* 1. Builds the CSR matrix using _buildMatrixCsrGPU.
* 2. For each cell, computes element matrix using _computeElementMatrixTETRA4GPU.
* 3. Assembles global matrix by adding contributions from each cell's element
* matrix to the corresponding entries in the global matrix.
*/
/*---------------------------------------------------------------------------*/

void FemModule::
_assembleCsrGPUBilinearOperatorTETRA4()
{
Timer::Action timer_gpu_bili(m_time_stats, "AssembleCsrGpuBilinearOperatorTetra4");

{
Timer::Action timer_gpu_build(m_time_stats, "CsrGpuBuildMatrix");
_buildMatrixCsrGPU();
}

RunQueue* queue = acceleratorMng()->defaultQueue();
auto command = makeCommand(queue);

auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView());
auto in_row_csr = ax::viewIn(command, m_csr_matrix.m_matrix_row);
Int32 row_csr_size = m_csr_matrix.m_matrix_row.dim1Size();
auto in_col_csr = ax::viewIn(command, m_csr_matrix.m_matrix_column);
Int32 col_csr_size = m_csr_matrix.m_matrix_column.dim1Size();
auto in_out_val_csr = ax::viewInOut(command, m_csr_matrix.m_matrix_value);
UnstructuredMeshConnectivityView m_connectivity_view;
auto in_node_coord = ax::viewIn(command, m_node_coord);
m_connectivity_view.setMesh(this->mesh());
auto cnc = m_connectivity_view.cellNode();
Arcane::ItemGenericInfoListView nodes_infos(this->mesh()->nodeFamily());

Timer::Action timer_add_compute(m_time_stats, "CsrGpuAddComputeLoop");

command << RUNCOMMAND_ENUMERATE(Cell, icell, allCells())
{

Real K_e[16] = { 0 };

_computeElementMatrixTETRA4GPU(icell, cnc, in_node_coord, K_e);

Int32 n1_index = 0;
for (NodeLocalId node1 : cnc.nodes(icell)) {
Int32 n2_index = 0;
for (NodeLocalId node2 : cnc.nodes(icell)) {
double v = K_e[n1_index * 4 + n2_index];

if (nodes_infos.isOwn(node1)) {

Int32 row = node_dof.dofId(node1, 0).localId();
Int32 col = node_dof.dofId(node2, 0).localId();
Int32 begin = in_row_csr[row];
Int32 end = (row == row_csr_size - 1) ? col_csr_size : in_row_csr[row + 1];

while (begin < end) {
if (in_col_csr[begin] == col) {
ax::doAtomic<ax::eAtomicOperation::Add>(in_out_val_csr(begin), v);
break;
}
begin++;
}
}
++n2_index;
}
++n1_index;
}
};
}

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
11 changes: 9 additions & 2 deletions poisson/FemModule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -456,12 +456,19 @@ _doStationarySolve()
#ifdef ARCANE_HAS_ACCELERATOR
if (m_use_csr_gpu) {
m_linear_system.clearValues();
_assembleCsrGPUBilinearOperatorTRIA3();
if (options()->meshType == "TRIA3")
_assembleCsrGPUBilinearOperatorTRIA3();
if (options()->meshType == "TETRA4")
_assembleCsrGPUBilinearOperatorTETRA4();
if (m_cache_warming != 1) {
m_time_stats->resetStats("AssembleCsrGpuBilinearOperatorTria3");
m_time_stats->resetStats("AssembleCsrGpuBilinearOperatorTetra4");
for (cache_index = 1; cache_index < m_cache_warming; cache_index++) {
m_linear_system.clearValues();
_assembleCsrGPUBilinearOperatorTRIA3();
if (options()->meshType == "TRIA3")
_assembleCsrGPUBilinearOperatorTRIA3();
if (options()->meshType == "TETRA4")
_assembleCsrGPUBilinearOperatorTETRA4();
}
}
m_csr_matrix.translateToLinearSystem(m_linear_system);
Expand Down
81 changes: 80 additions & 1 deletion poisson/FemModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ class FemModule

void _applyDirichletBoundaryConditionsGpu();
void _assembleCsrGpuLinearOperator();

static ARCCORE_HOST_DEVICE Int32
_getValIndexCsrGpu(Int32 begin, Int32 end, DoFLocalId col, ax::NumArrayView<DataViewGetter<Int32>, MDDim1, DefaultLayout> csr_col);

Expand Down Expand Up @@ -255,13 +256,18 @@ class FemModule
_computeElementMatrixTRIA3GPU(CellLocalId icell, IndexedCellNodeConnectivityView cnc,
ax::VariableNodeReal3InView in_node_coord, Real K_e[9]);

static ARCCORE_HOST_DEVICE inline void
_computeElementMatrixTETRA4GPU(CellLocalId icell, IndexedCellNodeConnectivityView cnc,
ax::VariableNodeReal3InView in_node_coord, Real K_e[12]);

private:


public:

void _buildMatrixCsrGPU();
void _assembleCsrGPUBilinearOperatorTRIA3();
void _assembleCsrGPUBilinearOperatorTETRA4();

private:

Expand Down Expand Up @@ -364,4 +370,77 @@ _computeElementMatrixTRIA3GPU(CellLocalId icell, IndexedCellNodeConnectivityView
//return int_cdPi_dPj;
}

#endif
/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

ARCCORE_HOST_DEVICE inline void FemModule::
_computeElementMatrixTETRA4GPU(CellLocalId icell, IndexedCellNodeConnectivityView cnc,
ax::VariableNodeReal3InView in_node_coord, Real K_e[16])
{
// Get coordinates of the triangle element TETRA4
//------------------------------------------------
// 3 o
// /|\
// / | \
// / | \
// / o 2 \
// / . . \
// o-----------o
// 0 1
//------------------------------------------------
Real3 m0 = in_node_coord[cnc.nodeId(icell, 0)];
Real3 m1 = in_node_coord[cnc.nodeId(icell, 1)];
Real3 m2 = in_node_coord[cnc.nodeId(icell, 2)];
Real3 m3 = in_node_coord[cnc.nodeId(icell, 3)];

Real3 v0 = m1 - m0;
Real3 v1 = m2 - m0;
Real3 v2 = m3 - m0;

Real volume = std::abs(Arcane::math::dot(v0, Arcane::math::cross(v1, v2))) / 6.0;

// Compute gradients of shape functions
Real3 dPhi0 = Arcane::math::cross(m2 - m1, m1 - m3);
Real3 dPhi1 = Arcane::math::cross(m3 - m0, m0 - m2);
Real3 dPhi2 = Arcane::math::cross(m1 - m0, m0 - m3);
Real3 dPhi3 = Arcane::math::cross(m0 - m1, m1 - m2);

Real mul = 1.0 / (6.0 * volume);

// Construct the B-matrix
Real b_matrix[3][4];
b_matrix[0][0] = dPhi0.x;
b_matrix[1][0] = dPhi0.y;
b_matrix[2][0] = dPhi0.z;

b_matrix[0][1] = dPhi1.x;
b_matrix[1][1] = dPhi1.y;
b_matrix[2][1] = dPhi1.z;

b_matrix[0][2] = dPhi2.x;
b_matrix[1][2] = dPhi2.y;
b_matrix[2][2] = dPhi2.z;

b_matrix[0][3] = dPhi3.x;
b_matrix[1][3] = dPhi3.y;
b_matrix[2][3] = dPhi3.z;

for (Int32 i = 0; i < 3; ++i)
for (Int32 j = 0; j < 4; ++j)
b_matrix[i][j] *= mul;

// Compute the element matrix
for (Int32 i = 0; i < 4; ++i) {
for (Int32 j = i; j < 4; ++j) {
for (Int32 k = 0; k < 3; ++k)
K_e[i * 4 + j] += b_matrix[k][i] * b_matrix[k][j];
K_e[i * 4 + j] *= volume;
K_e[j * 4 + i] = K_e[i * 4 + j];
}
}
}

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

#endif
43 changes: 43 additions & 0 deletions poisson/Test.sphere.3D.csr-gpu.arc
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?xml version="1.0"?>
<case codename="Poisson" xml:lang="en" codeversion="1.0">
<arcane>
<title>Sphere 3D with CSR sparse matrix format Gpu compatible. The result of this test is compared with test_sphere_3D_results.txt</title>
<timeloop>PoissonLoop</timeloop>
</arcane>

<arcane-post-processing>
<output-period>1</output-period>
<format name="VtkHdfV2PostProcessor" />
<output>
<variable>U</variable>
</output>
</arcane-post-processing>

<meshes>
<mesh>
<filename>sphere_cut.msh</filename>
</mesh>
</meshes>

<fem>
<result-file>test_sphere_3D_results.txt</result-file>
<f>-0.01</f>
<mesh-type>TETRA4</mesh-type>
<enforce-Dirichlet-method>Penalty</enforce-Dirichlet-method>
<penalty>1.e31</penalty>
<dirichlet-boundary-condition>
<surface>sphere</surface>
<value>0.0</value>
</dirichlet-boundary-condition>
<dirichlet-boundary-condition>
<surface>Cut</surface>
<value>10.0</value>
</dirichlet-boundary-condition>
<linear-system name="HypreLinearSystem">
<rtol>0.</rtol>
<atol>1e-5</atol>
<amg-threshold>0.55</amg-threshold>
</linear-system>
<csr-gpu>true</csr-gpu>
</fem>
</case>

0 comments on commit bbabecf

Please sign in to comment.