diff --git a/poisson/CMakeLists.txt b/poisson/CMakeLists.txt
index 0985e24..0595383 100644
--- a/poisson/CMakeLists.txt
+++ b/poisson/CMakeLists.txt
@@ -37,6 +37,7 @@ configure_file(Test.L-shape.3D.coo-sort.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY
 configure_file(Test.sphere.3D.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
 configure_file(Test.sphere.3D.csr.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
 configure_file(Test.sphere.3D.csr.no-edge.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
+configure_file(Test.sphere.3D.csr-gpu.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
 configure_file(Test.direct-solver.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
 configure_file(Test.neumann.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
 configure_file(Test.porous.arc ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
@@ -105,6 +106,7 @@ endif()
 
 
 arcanefem_add_gpu_test(NAME [poisson]gpu COMMAND ./Poisson ARGS Test.petsc.arc)
+arcanefem_add_gpu_test(NAME [poisson]hypre_direct_3D_csr-gpu COMMAND ./Poisson ARGS Test.sphere.3D.csr-gpu.arc)
 arcanefem_add_gpu_test(NAME [poisson]L-shape_2D_petsc_coo-gpu COMMAND ./Poisson ARGS Test.L-shape.2D.coo-gpu.arc)
 arcanefem_add_gpu_test(NAME [poisson]L-shape_3D_petsc_coo-gpu COMMAND ./Poisson ARGS Test.L-shape.3D.coo-gpu.arc)
 if(FEMUTILS_HAS_SOLVER_BACKEND_HYPRE)
diff --git a/poisson/CsrGpuBiliAssembly.cc b/poisson/CsrGpuBiliAssembly.cc
index 48dc341..8d3ec0e 100644
--- a/poisson/CsrGpuBiliAssembly.cc
+++ b/poisson/CsrGpuBiliAssembly.cc
@@ -14,33 +14,20 @@
 
 #include "FemModule.h"
 
-/**
- * @brief Initialization of the csr matrix. It only works for p=1 since there is
- * one node per Edge. Currently, there is no difference between buildMatrixCsr and this method.
- * 
- * 
- */
+/*---------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------*/
+
 void FemModule::
 _buildMatrixCsrGPU()
 {
-  //Initialization of the csr matrix;
-  //This formula only works in p=1
-
-  /*
-  //Create a connection between nodes through the faces
-  //Useless here because we only need this information once
-  IItemFamily* node_family = mesh()->nodeFamily();
-  NodeGroup nodes = node_family->allItems();
-  auto idx_cn = mesh()->indexedConnectivityMng()->findOrCreateConnectivity(node_family, node_family, "NodeToNeighbourFaceNodes");
-  auto* cn = idx_cn->connectivity();
-  ENUMERATE_NODE (node, allNodes()) {
-  }
-  */
+  auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView());
+
+  Int32 nnz = (options()->meshType == "TRIA3" ? nbFace() : nbEdge() * 2) + nbNode();
 
-  Int32 nnz = nbFace() * 2 + nbNode();
   m_csr_matrix.initialize(m_dof_family, nnz, nbNode());
-  auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView());
+
   //We iterate through the node, and we do not sort anymore : we assume the nodes ID are sorted, and we will iterate throught the column to avoid making < and > comparison
+  if (options()->meshType == "TRIA3") {
   ENUMERATE_NODE (inode, allNodes()) {
     Node node = *inode;
     Int32 node_dof_id = node_dof.dofId(node, 0);
@@ -55,9 +42,36 @@ _buildMatrixCsrGPU()
         m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(face.nodeId(0), 0));
     }
   }
+  }
+  else if (options()->meshType == "TETRA4") {
+  ENUMERATE_NODE (inode, allNodes()) {
+    Node node = *inode;
+    Int32 node_dof_id = node_dof.dofId(node, 0);
+    ItemLocalIdT<DoF> diagonal_entry(node_dof_id);
+
+    m_csr_matrix.setCoordinates(diagonal_entry, diagonal_entry);
+
+    for (Edge edge : node.edges()) {
+      if (edge.nodeId(0) == node.localId())
+        m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(edge.nodeId(1), 0));
+      else
+        m_csr_matrix.setCoordinates(diagonal_entry, node_dof.dofId(edge.nodeId(0), 0));
+    }
+  }
+  }
 }
 
 /*---------------------------------------------------------------------------*/
+/**
+ * @brief Assembles the bilinear operator matrix for the FEM linear system with
+ * the CSR sparse matrix format for TRIA3 elements.
+ *
+ * The method performs the following steps:
+ *   1. Builds the CSR matrix using _buildMatrixCsrGPU.
+ *   2. For each cell, computes element matrix using _computeElementMatrixTRIA3GPU.
+ *   3. Assembles global matrix by adding contributions from each cell's element 
+ *      matrix to the corresponding entries in the global matrix.
+ */
 /*---------------------------------------------------------------------------*/
 
 void FemModule::
@@ -136,4 +150,79 @@ _assembleCsrGPUBilinearOperatorTRIA3()
 }
 
 /*---------------------------------------------------------------------------*/
+/**
+ * @brief Assembles the bilinear operator matrix for the FEM linear system with
+ * the CSR sparse matrix format for TETRA4 elements.
+ *
+ * The method performs the following steps:
+ *   1. Builds the CSR matrix using _buildMatrixCsrGPU.
+ *   2. For each cell, computes element matrix using _computeElementMatrixTETRA4GPU.
+ *   3. Assembles global matrix by adding contributions from each cell's element 
+ *      matrix to the corresponding entries in the global matrix.
+ */
+/*---------------------------------------------------------------------------*/
+
+void FemModule::
+_assembleCsrGPUBilinearOperatorTETRA4()
+{
+  Timer::Action timer_gpu_bili(m_time_stats, "AssembleCsrGpuBilinearOperatorTetra4");
+
+  {
+    Timer::Action timer_gpu_build(m_time_stats, "CsrGpuBuildMatrix");
+    _buildMatrixCsrGPU();
+  }
+
+  RunQueue* queue = acceleratorMng()->defaultQueue();
+  auto command = makeCommand(queue);
+
+  auto node_dof(m_dofs_on_nodes.nodeDoFConnectivityView());
+  auto in_row_csr = ax::viewIn(command, m_csr_matrix.m_matrix_row);
+  Int32 row_csr_size = m_csr_matrix.m_matrix_row.dim1Size();
+  auto in_col_csr = ax::viewIn(command, m_csr_matrix.m_matrix_column);
+  Int32 col_csr_size = m_csr_matrix.m_matrix_column.dim1Size();
+  auto in_out_val_csr = ax::viewInOut(command, m_csr_matrix.m_matrix_value);
+  UnstructuredMeshConnectivityView m_connectivity_view;
+  auto in_node_coord = ax::viewIn(command, m_node_coord);
+  m_connectivity_view.setMesh(this->mesh());
+  auto cnc = m_connectivity_view.cellNode();
+  Arcane::ItemGenericInfoListView nodes_infos(this->mesh()->nodeFamily());
+
+  Timer::Action timer_add_compute(m_time_stats, "CsrGpuAddComputeLoop");
+
+  command << RUNCOMMAND_ENUMERATE(Cell, icell, allCells())
+  {
+
+    Real K_e[16] = { 0 };
+
+    _computeElementMatrixTETRA4GPU(icell, cnc, in_node_coord, K_e);
+
+    Int32 n1_index = 0;
+    for (NodeLocalId node1 : cnc.nodes(icell)) {
+      Int32 n2_index = 0;
+      for (NodeLocalId node2 : cnc.nodes(icell)) {
+        double v = K_e[n1_index * 4 + n2_index];
+
+        if (nodes_infos.isOwn(node1)) {
+
+          Int32 row = node_dof.dofId(node1, 0).localId();
+          Int32 col = node_dof.dofId(node2, 0).localId();
+          Int32 begin = in_row_csr[row];
+          Int32 end = (row == row_csr_size - 1) ? col_csr_size : in_row_csr[row + 1];
+
+          while (begin < end) {
+            if (in_col_csr[begin] == col) {
+              ax::doAtomic<ax::eAtomicOperation::Add>(in_out_val_csr(begin), v);
+              break;
+            }
+            begin++;
+          }
+        }
+        ++n2_index;
+      }
+      ++n1_index;
+    }
+  };
+}
+
 /*---------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------*/
\ No newline at end of file
diff --git a/poisson/FemModule.cc b/poisson/FemModule.cc
index b910dbb..fd10682 100644
--- a/poisson/FemModule.cc
+++ b/poisson/FemModule.cc
@@ -489,12 +489,19 @@ _doStationarySolve()
 
   if (m_use_csr_gpu) {
     m_linear_system.clearValues();
-    _assembleCsrGPUBilinearOperatorTRIA3();
+    if (options()->meshType == "TRIA3")
+      _assembleCsrGPUBilinearOperatorTRIA3();
+    if (options()->meshType == "TETRA4")
+      _assembleCsrGPUBilinearOperatorTETRA4();
     if (m_cache_warming != 1) {
       m_time_stats->resetStats("AssembleCsrGpuBilinearOperatorTria3");
+      m_time_stats->resetStats("AssembleCsrGpuBilinearOperatorTetra4");
       for (cache_index = 1; cache_index < m_cache_warming; cache_index++) {
         m_linear_system.clearValues();
-        _assembleCsrGPUBilinearOperatorTRIA3();
+        if (options()->meshType == "TRIA3")
+          _assembleCsrGPUBilinearOperatorTRIA3();
+        if (options()->meshType == "TETRA4")
+          _assembleCsrGPUBilinearOperatorTETRA4();
       }
     }
      m_csr_matrix.translateToLinearSystem(m_linear_system);
diff --git a/poisson/FemModule.h b/poisson/FemModule.h
index a1335d1..7bfbb38 100644
--- a/poisson/FemModule.h
+++ b/poisson/FemModule.h
@@ -224,6 +224,7 @@ class FemModule
 
   void _applyDirichletBoundaryConditionsGpu();
   void _assembleCsrGpuLinearOperator();
+
   static ARCCORE_HOST_DEVICE Int32
   _getValIndexCsrGpu(Int32 begin, Int32 end, DoFLocalId col, ax::NumArrayView<DataViewGetter<Int32>, MDDim1, DefaultLayout> csr_col);
 
@@ -262,11 +263,13 @@ class FemModule
   _computeElementMatrixTETRA4GPU(CellLocalId icell, IndexedCellNodeConnectivityView cnc,
                                  ax::VariableNodeReal3InView in_node_coord, Real K_e[16]);
 
+
  private:
  public:
 
   void _buildMatrixCsrGPU();
   void _assembleCsrGPUBilinearOperatorTRIA3();
+  void _assembleCsrGPUBilinearOperatorTETRA4();
 
  private:
  public:
@@ -437,6 +440,7 @@ _computeElementMatrixTETRA4GPU(CellLocalId icell, IndexedCellNodeConnectivityVie
     }
   }
 }
+
 /*---------------------------------------------------------------------------*/
 /*---------------------------------------------------------------------------*/
 
diff --git a/poisson/Test.sphere.3D.csr-gpu.arc b/poisson/Test.sphere.3D.csr-gpu.arc
new file mode 100644
index 0000000..5c45376
--- /dev/null
+++ b/poisson/Test.sphere.3D.csr-gpu.arc
@@ -0,0 +1,43 @@
+<?xml version="1.0"?>
+<case codename="Poisson" xml:lang="en" codeversion="1.0">
+  <arcane>
+    <title>Sphere 3D with CSR sparse matrix format Gpu compatible. The result of this test is compared with test_sphere_3D_results.txt</title>
+    <timeloop>PoissonLoop</timeloop>
+  </arcane>
+
+  <arcane-post-processing>
+   <output-period>1</output-period>
+   <format name="VtkHdfV2PostProcessor" />
+   <output>
+     <variable>U</variable>
+   </output>
+  </arcane-post-processing>
+
+  <meshes>
+    <mesh>
+      <filename>sphere_cut.msh</filename>
+    </mesh>
+  </meshes>
+
+  <fem>
+    <result-file>test_sphere_3D_results.txt</result-file>
+    <f>-0.01</f>
+    <mesh-type>TETRA4</mesh-type>
+    <enforce-Dirichlet-method>Penalty</enforce-Dirichlet-method>
+    <penalty>1.e31</penalty>
+    <dirichlet-boundary-condition>
+      <surface>sphere</surface>
+      <value>0.0</value>
+    </dirichlet-boundary-condition>
+    <dirichlet-boundary-condition>
+      <surface>Cut</surface>
+      <value>10.0</value>
+    </dirichlet-boundary-condition>
+    <linear-system name="HypreLinearSystem">
+      <rtol>0.</rtol>
+      <atol>1e-5</atol>
+      <amg-threshold>0.55</amg-threshold>
+    </linear-system>
+    <csr-gpu>true</csr-gpu>
+  </fem>
+</case>