From 0aee4c568e544d6e963f604cb71c5ecfc65d2e55 Mon Sep 17 00:00:00 2001
From: Beka Barbakadze <beka.barbakadze@zama.ai>
Date: Wed, 13 Nov 2024 12:13:19 +0400
Subject: [PATCH] feat(gpu):  add abs operation on gpu backend

---
 .../cuda/include/integer/integer.h            |  19 +++
 .../cuda/include/integer/integer_utilities.h  |  48 ++++++
 .../tfhe-cuda-backend/cuda/src/integer/abs.cu |  43 ++++++
 .../cuda/src/integer/abs.cuh                  |  69 +++++++++
 backends/tfhe-cuda-backend/src/bindings.rs    |  44 ++++++
 tfhe/benches/integer/signed_bench.rs          |  11 ++
 tfhe/src/integer/gpu/mod.rs                   |  65 ++++++++
 tfhe/src/integer/gpu/server_key/radix/abs.rs  | 142 ++++++++++++++++++
 tfhe/src/integer/gpu/server_key/radix/mod.rs  |   1 +
 .../gpu/server_key/radix/tests_signed/mod.rs  |   1 +
 .../server_key/radix/tests_signed/test_abs.rs |  27 ++++
 11 files changed, 470 insertions(+)
 create mode 100644 backends/tfhe-cuda-backend/cuda/src/integer/abs.cu
 create mode 100644 backends/tfhe-cuda-backend/cuda/src/integer/abs.cuh
 create mode 100644 tfhe/src/integer/gpu/server_key/radix/abs.rs
 create mode 100644 tfhe/src/integer/gpu/server_key/radix/tests_signed/test_abs.rs
diff --git a/backends/tfhe-cuda-backend/cuda/include/integer/integer.h b/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
index a9990423fe..4de3dad896 100644
--- a/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
@@ -417,5 +417,24 @@ void cuda_integer_reverse_blocks_64_inplace(void *const *streams,
                                             uint32_t num_blocks,
                                             uint32_t lwe_size);
 
+void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr, bool is_signed, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t big_lwe_dimension,
+    uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
+    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
+    uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
+    PBS_TYPE pbs_type, bool allocate_gpu_memory);
+
+void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    void *ct, int8_t *mem_ptr, bool is_signed, void *const *bsks,
+    void *const *ksks, uint32_t num_blocks);
+
+void cleanup_cuda_integer_abs_inplace(void *const *streams,
+                                      uint32_t const *gpu_indexes,
+                                      uint32_t gpu_count,
+                                      int8_t **mem_ptr_void);
+
 } // extern C
 #endif // CUDA_INTEGER_H
diff --git a/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h b/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
index 6dc85d4225..f925edadfb 100644
--- a/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
@@ -3012,4 +3012,52 @@ template <typename Torus> struct int_scalar_mul_buffer {
   }
 };
 
+template <typename Torus> struct int_abs_buffer {
+  int_radix_params params;
+
+  int_arithmetic_scalar_shift_buffer<Torus> *arithmetic_scalar_shift_mem;
+  int_sc_prop_memory<Torus> *scp_mem;
+  int_bitop_buffer<Torus> *bitxor_mem;
+
+  Torus *mask;
+  int_abs_buffer(cudaStream_t const *streams, uint32_t const *gpu_indexes,
+                 uint32_t gpu_count, int_radix_params params,
+                 uint32_t num_radix_blocks, bool allocate_gpu_memory) {
+    this->params = params;
+
+    if (allocate_gpu_memory) {
+      arithmetic_scalar_shift_mem =
+          new int_arithmetic_scalar_shift_buffer<Torus>(
+              streams, gpu_indexes, gpu_count,
+              SHIFT_OR_ROTATE_TYPE::RIGHT_SHIFT, params, num_radix_blocks,
+              allocate_gpu_memory);
+      scp_mem =
+          new int_sc_prop_memory<Torus>(streams, gpu_indexes, gpu_count, params,
+                                        num_radix_blocks, allocate_gpu_memory);
+      bitxor_mem = new int_bitop_buffer<Torus>(
+          streams, gpu_indexes, gpu_count, BITOP_TYPE::BITXOR, params,
+          num_radix_blocks, allocate_gpu_memory);
+
+      uint32_t lwe_size = params.big_lwe_dimension + 1;
+      uint32_t lwe_size_bytes = lwe_size * sizeof(Torus);
+
+      mask = (Torus *)cuda_malloc_async(num_radix_blocks * lwe_size_bytes,
+                                        streams[0], gpu_indexes[0]);
+    }
+  }
+
+  void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
+               uint32_t gpu_count) {
+    arithmetic_scalar_shift_mem->release(streams, gpu_indexes, gpu_count);
+    scp_mem->release(streams, gpu_indexes, gpu_count);
+    bitxor_mem->release(streams, gpu_indexes, gpu_count);
+
+    delete arithmetic_scalar_shift_mem;
+    delete scp_mem;
+    delete bitxor_mem;
+
+    cuda_drop_async(mask, streams[0], gpu_indexes[0]);
+  }
+};
+
 #endif // CUDA_INTEGER_UTILITIES_H
diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/abs.cu b/backends/tfhe-cuda-backend/cuda/src/integer/abs.cu
new file mode 100644
index 0000000000..1f462753ee
--- /dev/null
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/abs.cu
@@ -0,0 +1,43 @@
+#include "integer/abs.cuh"
+
+void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr, bool is_signed, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t big_lwe_dimension,
+    uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
+    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
+    uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
+    PBS_TYPE pbs_type, bool allocate_gpu_memory) {
+
+  int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
+                          big_lwe_dimension, small_lwe_dimension, ks_level,
+                          ks_base_log, pbs_level, pbs_base_log, grouping_factor,
+                          message_modulus, carry_modulus);
+
+  scratch_cuda_integer_abs_kb<uint64_t>(
+      (cudaStream_t *)(streams), gpu_indexes, gpu_count,
+      (int_abs_buffer<uint64_t> **)mem_ptr, is_signed, num_blocks, params,
+      allocate_gpu_memory);
+}
+
+void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    void *ct, int8_t *mem_ptr, bool is_signed, void *const *bsks,
+    void *const *ksks, uint32_t num_blocks) {
+
+  auto mem = (int_abs_buffer<uint64_t> *)mem_ptr;
+
+  host_integer_abs_kb<uint64_t>((cudaStream_t *)(streams), gpu_indexes,
+                                gpu_count, static_cast<uint64_t *>(ct), bsks,
+                                (uint64_t **)(ksks), mem, is_signed,
+                                num_blocks);
+}
+
+void cleanup_cuda_integer_abs_inplace(void *const *streams,
+                                      uint32_t const *gpu_indexes,
+                                      uint32_t gpu_count,
+                                      int8_t **mem_ptr_void) {
+  int_abs_buffer<uint64_t> *mem_ptr =
+      (int_abs_buffer<uint64_t> *)(*mem_ptr_void);
+  mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
+}
diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/abs.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/abs.cuh
new file mode 100644
index 0000000000..ad1a4b9e23
--- /dev/null
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/abs.cuh
@@ -0,0 +1,69 @@
+#ifndef TFHE_RS_ABS_CUH
+#define TFHE_RS_ABS_CUH
+
+#include "crypto/keyswitch.cuh"
+#include "device.h"
+#include "integer/bitwise_ops.cuh"
+#include "integer/comparison.cuh"
+#include "integer/integer.cuh"
+#include "integer/integer_utilities.h"
+#include "integer/negation.cuh"
+#include "integer/scalar_shifts.cuh"
+#include "linear_algebra.h"
+#include "pbs/programmable_bootstrap.h"
+#include "utils/helper.cuh"
+#include "utils/kernel_dimensions.cuh"
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+template <typename Torus>
+__host__ void scratch_cuda_integer_abs_kb(
+    cudaStream_t const *streams, uint32_t const *gpu_indexes,
+    uint32_t gpu_count, int_abs_buffer<Torus> **mem_ptr, bool is_signed,
+    uint32_t num_blocks, int_radix_params params, bool allocate_gpu_memory) {
+
+  if (is_signed)
+    *mem_ptr =
+        new int_abs_buffer<Torus>(streams, gpu_indexes, gpu_count, params,
+                                  num_blocks, allocate_gpu_memory);
+}
+
+template <typename Torus>
+__host__ void
+host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
+                    uint32_t gpu_count, Torus *ct, void *const *bsks,
+                    uint64_t *const *ksks, int_abs_buffer<uint64_t> *mem_ptr,
+                    bool is_signed, uint32_t num_blocks) {
+  if (!is_signed)
+    return;
+
+  auto radix_params = mem_ptr->params;
+  auto mask = mem_ptr->mask;
+
+  auto big_lwe_dimension = radix_params.big_lwe_dimension;
+  auto big_lwe_size = big_lwe_dimension + 1;
+  auto big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
+  uint32_t num_bits_in_ciphertext =
+      (31 - __builtin_clz(radix_params.message_modulus)) * num_blocks;
+
+  cuda_memcpy_async_gpu_to_gpu(mask, ct, num_blocks * big_lwe_size_bytes,
+                               streams[0], gpu_indexes[0]);
+
+  host_integer_radix_arithmetic_scalar_shift_kb_inplace(
+      streams, gpu_indexes, gpu_count, mask, num_bits_in_ciphertext - 1,
+      mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks, num_blocks);
+  host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
+                       radix_params.big_lwe_dimension, num_blocks);
+
+  host_propagate_single_carry<Torus>(streams, gpu_indexes, gpu_count, ct,
+                                     nullptr, nullptr, mem_ptr->scp_mem, bsks,
+                                     ksks, num_blocks);
+
+  host_integer_radix_bitop_kb(streams, gpu_indexes, gpu_count, ct, mask, ct,
+                              mem_ptr->bitxor_mem, bsks, ksks, num_blocks);
+}
+
+#endif // TFHE_RS_ABS_CUH
diff --git a/backends/tfhe-cuda-backend/src/bindings.rs b/backends/tfhe-cuda-backend/src/bindings.rs
index 2a665498f3..feadbe676d 100644
--- a/backends/tfhe-cuda-backend/src/bindings.rs
+++ b/backends/tfhe-cuda-backend/src/bindings.rs
@@ -1036,6 +1036,50 @@ extern "C" {
         lwe_size: u32,
     );
 }
+extern "C" {
+    pub fn scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
+        streams: *const *mut ffi::c_void,
+        gpu_indexes: *const u32,
+        gpu_count: u32,
+        mem_ptr: *mut *mut i8,
+        is_signed: bool,
+        glwe_dimension: u32,
+        polynomial_size: u32,
+        big_lwe_dimension: u32,
+        small_lwe_dimension: u32,
+        ks_level: u32,
+        ks_base_log: u32,
+        pbs_level: u32,
+        pbs_base_log: u32,
+        grouping_factor: u32,
+        num_blocks: u32,
+        message_modulus: u32,
+        carry_modulus: u32,
+        pbs_type: PBS_TYPE,
+        allocate_gpu_memory: bool,
+    );
+}
+extern "C" {
+    pub fn cuda_integer_abs_inplace_radix_ciphertext_kb_64(
+        streams: *const *mut ffi::c_void,
+        gpu_indexes: *const u32,
+        gpu_count: u32,
+        ct: *mut ffi::c_void,
+        mem_ptr: *mut i8,
+        is_signed: bool,
+        bsks: *const *mut ffi::c_void,
+        ksks: *const *mut ffi::c_void,
+        num_blocks: u32,
+    );
+}
+extern "C" {
+    pub fn cleanup_cuda_integer_abs_inplace(
+        streams: *const *mut ffi::c_void,
+        gpu_indexes: *const u32,
+        gpu_count: u32,
+        mem_ptr_void: *mut *mut i8,
+    );
+}
 extern "C" {
     pub fn cuda_keyswitch_lwe_ciphertext_vector_32(
         stream: *mut ffi::c_void,
diff --git a/tfhe/benches/integer/signed_bench.rs b/tfhe/benches/integer/signed_bench.rs
index ec7bcbdbc8..3638c8c65e 100644
--- a/tfhe/benches/integer/signed_bench.rs
+++ b/tfhe/benches/integer/signed_bench.rs
@@ -1602,6 +1602,10 @@ mod cuda {
         display_name: neg
     );
 
+    define_cuda_server_key_bench_clean_input_signed_unary_fn!(
+        method_name: unchecked_abs,
+        display_name: abs
+    );
     define_cuda_server_key_bench_clean_input_signed_fn!(
         method_name: unchecked_mul,
         display_name: mul
@@ -1842,6 +1846,11 @@ mod cuda {
         display_name: neg
     );
 
+    define_cuda_server_key_bench_clean_input_signed_unary_fn!(
+        method_name: abs,
+        display_name: abs
+    );
+
     define_cuda_server_key_bench_clean_input_signed_fn!(
         method_name: mul,
         display_name: mul
@@ -2056,6 +2065,7 @@ mod cuda {
         cuda_unchecked_add,
         cuda_unchecked_sub,
         cuda_unchecked_neg,
+        cuda_unchecked_abs,
         cuda_unchecked_mul,
         cuda_unchecked_bitand,
         cuda_unchecked_bitnot,
@@ -2105,6 +2115,7 @@ mod cuda {
         cuda_add,
         cuda_sub,
         cuda_neg,
+        cuda_abs,
         cuda_mul,
         cuda_bitand,
         cuda_bitnot,
diff --git a/tfhe/src/integer/gpu/mod.rs b/tfhe/src/integer/gpu/mod.rs
index 0e40674f4c..871332beea 100644
--- a/tfhe/src/integer/gpu/mod.rs
+++ b/tfhe/src/integer/gpu/mod.rs
@@ -2770,3 +2770,68 @@ pub unsafe fn reverse_blocks_inplace_async<T: UnsignedInteger>(
         );
     }
 }
+
+#[allow(clippy::too_many_arguments)]
+/// # Safety
+///
+/// - [CudaStreams::synchronize] __must__ be called after this function as soon as synchronization
+///   is required
+pub unsafe fn unchecked_signed_abs_radix_kb_assign_async<T: UnsignedInteger, B: Numeric>(
+    streams: &CudaStreams,
+    ct: &mut CudaVec<T>,
+    bootstrapping_key: &CudaVec<B>,
+    keyswitch_key: &CudaVec<T>,
+    message_modulus: MessageModulus,
+    carry_modulus: CarryModulus,
+    glwe_dimension: GlweDimension,
+    polynomial_size: PolynomialSize,
+    big_lwe_dimension: LweDimension,
+    small_lwe_dimension: LweDimension,
+    ks_level: DecompositionLevelCount,
+    ks_base_log: DecompositionBaseLog,
+    pbs_level: DecompositionLevelCount,
+    pbs_base_log: DecompositionBaseLog,
+    num_blocks: u32,
+    pbs_type: PBSType,
+    grouping_factor: LweBskGroupingFactor,
+) {
+    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
+    scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
+        streams.ptr.as_ptr(),
+        streams.gpu_indexes.as_ptr(),
+        streams.len() as u32,
+        std::ptr::addr_of_mut!(mem_ptr),
+        true,
+        glwe_dimension.0 as u32,
+        polynomial_size.0 as u32,
+        big_lwe_dimension.0 as u32,
+        small_lwe_dimension.0 as u32,
+        ks_level.0 as u32,
+        ks_base_log.0 as u32,
+        pbs_level.0 as u32,
+        pbs_base_log.0 as u32,
+        grouping_factor.0 as u32,
+        num_blocks,
+        message_modulus.0 as u32,
+        carry_modulus.0 as u32,
+        pbs_type as u32,
+        true,
+    );
+    cuda_integer_abs_inplace_radix_ciphertext_kb_64(
+        streams.ptr.as_ptr(),
+        streams.gpu_indexes.as_ptr(),
+        streams.len() as u32,
+        ct.as_mut_c_ptr(0),
+        mem_ptr,
+        true,
+        bootstrapping_key.ptr.as_ptr(),
+        keyswitch_key.ptr.as_ptr(),
+        num_blocks,
+    );
+    cleanup_cuda_integer_abs_inplace(
+        streams.ptr.as_ptr(),
+        streams.gpu_indexes.as_ptr(),
+        streams.len() as u32,
+        std::ptr::addr_of_mut!(mem_ptr),
+    );
+}
diff --git a/tfhe/src/integer/gpu/server_key/radix/abs.rs b/tfhe/src/integer/gpu/server_key/radix/abs.rs
new file mode 100644
index 0000000000..1255691f11
--- /dev/null
+++ b/tfhe/src/integer/gpu/server_key/radix/abs.rs
@@ -0,0 +1,142 @@
+use crate::core_crypto::gpu::CudaStreams;
+use crate::core_crypto::prelude::LweBskGroupingFactor;
+use crate::integer::gpu::ciphertext::CudaIntegerRadixCiphertext;
+use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaServerKey};
+use crate::integer::gpu::{unchecked_signed_abs_radix_kb_assign_async, PBSType};
+
+impl CudaServerKey {
+    /// # Safety
+    ///
+    /// - [CudaStreams::synchronize] __must__ be called after this function as soon as
+    ///   synchronization is required
+    pub unsafe fn unchecked_abs_assign_async<T>(&self, ct: &mut T, streams: &CudaStreams)
+    where
+        T: CudaIntegerRadixCiphertext,
+    {
+        let num_blocks = ct.as_ref().d_blocks.lwe_ciphertext_count().0 as u32;
+
+        match &self.bootstrapping_key {
+            CudaBootstrappingKey::Classic(d_bsk) => {
+                unchecked_signed_abs_radix_kb_assign_async(
+                    streams,
+                    &mut ct.as_mut().d_blocks.0.d_vec,
+                    &d_bsk.d_vec,
+                    &self.key_switching_key.d_vec,
+                    self.message_modulus,
+                    self.carry_modulus,
+                    d_bsk.glwe_dimension,
+                    d_bsk.polynomial_size,
+                    self.key_switching_key
+                        .input_key_lwe_size()
+                        .to_lwe_dimension(),
+                    self.key_switching_key
+                        .output_key_lwe_size()
+                        .to_lwe_dimension(),
+                    self.key_switching_key.decomposition_level_count(),
+                    self.key_switching_key.decomposition_base_log(),
+                    d_bsk.decomp_level_count,
+                    d_bsk.decomp_base_log,
+                    num_blocks,
+                    PBSType::Classical,
+                    LweBskGroupingFactor(0),
+                );
+            }
+            CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
+                unchecked_signed_abs_radix_kb_assign_async(
+                    streams,
+                    &mut ct.as_mut().d_blocks.0.d_vec,
+                    &d_multibit_bsk.d_vec,
+                    &self.key_switching_key.d_vec,
+                    self.message_modulus,
+                    self.carry_modulus,
+                    d_multibit_bsk.glwe_dimension,
+                    d_multibit_bsk.polynomial_size,
+                    self.key_switching_key
+                        .input_key_lwe_size()
+                        .to_lwe_dimension(),
+                    self.key_switching_key
+                        .output_key_lwe_size()
+                        .to_lwe_dimension(),
+                    self.key_switching_key.decomposition_level_count(),
+                    self.key_switching_key.decomposition_base_log(),
+                    d_multibit_bsk.decomp_level_count,
+                    d_multibit_bsk.decomp_base_log,
+                    num_blocks,
+                    PBSType::MultiBit,
+                    d_multibit_bsk.grouping_factor,
+                );
+            }
+        };
+    }
+    pub fn unchecked_abs<T>(&self, ct: &T, streams: &CudaStreams) -> T
+    where
+        T: CudaIntegerRadixCiphertext,
+    {
+        let mut res = unsafe { ct.duplicate_async(streams) };
+        if T::IS_SIGNED {
+            unsafe { self.unchecked_abs_assign_async(&mut res, streams) };
+        }
+        streams.synchronize();
+        res
+    }
+
+    /// Computes homomorphically an absolute value of ciphertext encrypting integer
+    /// values.
+    ///
+    /// This function, like all "default" operations (i.e. not smart, checked or unchecked), will
+    /// check that the input ciphertext block carries are empty and clears them if it's not the
+    /// case and the operation requires it. It outputs a ciphertext whose block carries are always
+    /// empty.
+    ///
+    /// # Warning
+    ///
+    /// - Multithreaded
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use tfhe::core_crypto::gpu::CudaStreams;
+    /// use tfhe::integer::gpu::ciphertext::CudaSignedRadixCiphertext;
+    /// use tfhe::integer::gpu::gen_keys_radix_gpu;
+    /// use tfhe::shortint::parameters::PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64;
+    ///
+    /// let gpu_index = 0;
+    /// let streams = CudaStreams::new_single_gpu(gpu_index);
+    ///
+    /// // Generate the client key and the server key:
+    /// let num_blocks = 4;
+    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, num_blocks, &streams);
+    ///
+    /// let msg = -14i32;
+    ///
+    /// let ct = cks.encrypt_signed(msg);
+    ///
+    /// // Copy to GPU
+    /// let d_ct = CudaSignedRadixCiphertext::from_signed_radix_ciphertext(&ct, &streams);
+    ///
+    /// // Compute homomorphically an absolute value:
+    /// let d_ct_res = sks.abs(&d_ct, &streams);
+    ///
+    /// let ct_res = d_ct_res.to_signed_radix_ciphertext(&streams);
+    ///
+    /// // Decrypt:
+    /// let dec_result: i32 = cks.decrypt_signed(&ct_res);
+    ///
+    /// let abs_msg = if msg < 0 { -msg } else { msg };
+    /// assert_eq!(dec_result, abs_msg );
+    /// ```
+    pub fn abs<T>(&self, ct: &T, streams: &CudaStreams) -> T
+    where
+        T: CudaIntegerRadixCiphertext,
+    {
+        let mut res = unsafe { ct.duplicate_async(streams) };
+        if !ct.block_carries_are_empty() {
+            unsafe { self.full_propagate_assign_async(&mut res, streams) };
+        };
+        if T::IS_SIGNED {
+            unsafe { self.unchecked_abs_assign_async(&mut res, streams) };
+        }
+        streams.synchronize();
+        res
+    }
+}
diff --git a/tfhe/src/integer/gpu/server_key/radix/mod.rs b/tfhe/src/integer/gpu/server_key/radix/mod.rs
index 32daab51ce..f7571bf7d0 100644
--- a/tfhe/src/integer/gpu/server_key/radix/mod.rs
+++ b/tfhe/src/integer/gpu/server_key/radix/mod.rs
@@ -24,6 +24,7 @@ use crate::shortint::server_key::{
 };
 use crate::shortint::PBSOrder;
 
+mod abs;
 mod add;
 mod bitwise_op;
 mod cmux;
diff --git a/tfhe/src/integer/gpu/server_key/radix/tests_signed/mod.rs b/tfhe/src/integer/gpu/server_key/radix/tests_signed/mod.rs
index 5205a32842..e8fedd2848 100644
--- a/tfhe/src/integer/gpu/server_key/radix/tests_signed/mod.rs
+++ b/tfhe/src/integer/gpu/server_key/radix/tests_signed/mod.rs
@@ -1,3 +1,4 @@
+pub(crate) mod test_abs;
 pub(crate) mod test_add;
 pub(crate) mod test_bitwise_op;
 pub(crate) mod test_cmux;
diff --git a/tfhe/src/integer/gpu/server_key/radix/tests_signed/test_abs.rs b/tfhe/src/integer/gpu/server_key/radix/tests_signed/test_abs.rs
new file mode 100644
index 0000000000..8f2a97ab86
--- /dev/null
+++ b/tfhe/src/integer/gpu/server_key/radix/tests_signed/test_abs.rs
@@ -0,0 +1,27 @@
+use crate::integer::gpu::server_key::radix::tests_unsigned::{
+    create_gpu_parametrized_test, GpuFunctionExecutor,
+};
+use crate::integer::gpu::CudaServerKey;
+use crate::integer::server_key::radix_parallel::tests_signed::test_abs::{
+    signed_default_absolute_value_test, signed_unchecked_absolute_value_test,
+};
+use crate::shortint::parameters::*;
+
+create_gpu_parametrized_test!(integer_signed_unchecked_abs);
+create_gpu_parametrized_test!(integer_signed_abs);
+
+fn integer_signed_unchecked_abs<P>(param: P)
+where
+    P: Into<PBSParameters>,
+{
+    let executor = GpuFunctionExecutor::new(&CudaServerKey::unchecked_abs);
+    signed_unchecked_absolute_value_test(param, executor);
+}
+
+fn integer_signed_abs<P>(param: P)
+where
+    P: Into<PBSParameters>,
+{
+    let executor = GpuFunctionExecutor::new(&CudaServerKey::abs);
+    signed_default_absolute_value_test(param, executor);
+}