chore(gpu): use natural order for decomposition levels in bsk

zama-ai · Nov 4, 2024 · 2b56ca2 · 2b56ca2
1 parent c163189
commit 2b56ca2
Show file tree

Hide file tree

Showing 4 changed files with 14 additions and 49 deletions.
diff --git a/backends/tfhe-cuda-backend/cuda/src/pbs/bootstrapping_key.cuh b/backends/tfhe-cuda-backend/cuda/src/pbs/bootstrapping_key.cuh
@@ -24,8 +24,8 @@ __device__ const T *get_ith_mask_kth_block(const T *ptr, int i, int k,
                                            uint32_t level_count) {
   return &ptr[get_start_ith_ggsw(i, polynomial_size, glwe_dimension,
                                  level_count) +
-              level * polynomial_size / 2 * (glwe_dimension + 1) *
-                  (glwe_dimension + 1) +
+              (level_count - level - 1) * polynomial_size / 2 *
+                  (glwe_dimension + 1) * (glwe_dimension + 1) +
               k * polynomial_size / 2 * (glwe_dimension + 1)];
 }
 
@@ -35,8 +35,8 @@ __device__ T *get_ith_mask_kth_block(T *ptr, int i, int k, int level,
                                      int glwe_dimension, uint32_t level_count) {
   return &ptr[get_start_ith_ggsw(i, polynomial_size, glwe_dimension,
                                  level_count) +
-              level * polynomial_size / 2 * (glwe_dimension + 1) *
-                  (glwe_dimension + 1) +
+              (level_count - level - 1) * polynomial_size / 2 *
+                  (glwe_dimension + 1) * (glwe_dimension + 1) +
               k * polynomial_size / 2 * (glwe_dimension + 1)];
 }
 template <typename T>
@@ -45,8 +45,8 @@ __device__ T *get_ith_body_kth_block(T *ptr, int i, int k, int level,
                                      int glwe_dimension, uint32_t level_count) {
   return &ptr[get_start_ith_ggsw(i, polynomial_size, glwe_dimension,
                                  level_count) +
-              level * polynomial_size / 2 * (glwe_dimension + 1) *
-                  (glwe_dimension + 1) +
+              (level_count - level - 1) * polynomial_size / 2 *
+                  (glwe_dimension + 1) * (glwe_dimension + 1) +
               k * polynomial_size / 2 * (glwe_dimension + 1) +
               glwe_dimension * polynomial_size / 2];
 }

diff --git a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_amortized.cuh b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_amortized.cuh
@@ -137,7 +137,7 @@ __global__ void device_programmable_bootstrap_amortized(
     // Now that the rotation is done, decompose the resulting polynomial
     // coefficients so as to multiply each decomposed level with the
     // corresponding part of the bootstrapping key
-    for (int level = level_count - 1; level >= 0; level--) {
+    for (int level = 0; level < level_count; level++) {
       for (int i = 0; i < (glwe_dimension + 1); i++) {
         gadget.decompose_and_compress_next_polynomial(accumulator_fft, i);
 

diff --git a/tfhe/src/core_crypto/gpu/entities/lwe_bootstrap_key.rs b/tfhe/src/core_crypto/gpu/entities/lwe_bootstrap_key.rs
@@ -1,9 +1,8 @@
 use crate::core_crypto::gpu::vec::CudaVec;
 use crate::core_crypto::gpu::{convert_lwe_programmable_bootstrap_key_async, CudaStreams};
 use crate::core_crypto::prelude::{
-    lwe_bootstrap_key_size, Container, ContiguousEntityContainerMut, DecompositionBaseLog,
-    DecompositionLevelCount, GgswCiphertextList, GlweDimension, LweBootstrapKey, LweDimension,
-    PolynomialSize, UnsignedInteger,
+    lwe_bootstrap_key_size, Container, DecompositionBaseLog, DecompositionLevelCount,
+    GlweDimension, LweBootstrapKey, LweDimension, PolynomialSize, UnsignedInteger,
 };
 
 /// A structure representing a vector of GLWE ciphertexts with 64 bits of precision on the GPU.
@@ -49,29 +48,12 @@ impl CudaLweBootstrapKey {
             ),
             streams,
         );
-
-        // HACK: for now the GPU has a level order that is not consistent with the CPU, so we copy
-        // the key here and update the level order.
-        let mut bsk_as_ggsw_list = GgswCiphertextList::from_container(
-            bsk.as_ref().to_vec(),
-            bsk.glwe_size(),
-            bsk.polynomial_size(),
-            bsk.decomposition_base_log(),
-            bsk.decomposition_level_count(),
-            bsk.ciphertext_modulus(),
-        );
-
-        for mut ggsw in bsk_as_ggsw_list.iter_mut() {
-            // Invert level to match the expected order for GPU
-            ggsw.reverse();
-        }
-
         // Copy to the GPU
         unsafe {
             convert_lwe_programmable_bootstrap_key_async(
                 streams,
                 &mut d_vec,
-                bsk_as_ggsw_list.as_ref(),
+                bsk.as_ref(),
                 input_lwe_dimension,
                 glwe_dimension,
                 decomp_level_count,

diff --git a/tfhe/src/core_crypto/gpu/entities/lwe_multi_bit_bootstrap_key.rs b/tfhe/src/core_crypto/gpu/entities/lwe_multi_bit_bootstrap_key.rs
@@ -3,9 +3,9 @@ use crate::core_crypto::gpu::{
     convert_lwe_multi_bit_programmable_bootstrap_key_async, CudaStreams,
 };
 use crate::core_crypto::prelude::{
-    lwe_multi_bit_bootstrap_key_size, Container, ContiguousEntityContainerMut,
-    DecompositionBaseLog, DecompositionLevelCount, GgswCiphertextList, GlweDimension,
-    LweBskGroupingFactor, LweDimension, LweMultiBitBootstrapKey, PolynomialSize, UnsignedInteger,
+    lwe_multi_bit_bootstrap_key_size, Container, DecompositionBaseLog, DecompositionLevelCount,
+    GlweDimension, LweBskGroupingFactor, LweDimension, LweMultiBitBootstrapKey, PolynomialSize,
+    UnsignedInteger,
 };
 
 /// A structure representing a vector of GLWE ciphertexts with 64 bits of precision on the GPU.
@@ -54,29 +54,12 @@ impl CudaLweMultiBitBootstrapKey {
             .unwrap(),
             streams,
         );
-
-        // HACK: for now the GPU has a level order that is not consistent with the CPU, so we copy
-        // the key here and update the level order.
-        let mut bsk_as_ggsw_list = GgswCiphertextList::from_container(
-            bsk.as_ref().to_vec(),
-            bsk.glwe_size(),
-            bsk.polynomial_size(),
-            bsk.decomposition_base_log(),
-            bsk.decomposition_level_count(),
-            bsk.ciphertext_modulus(),
-        );
-
-        for mut ggsw in bsk_as_ggsw_list.iter_mut() {
-            // Invert level to match the expected order for GPU
-            ggsw.reverse();
-        }
-
         // Copy to the GPU
         unsafe {
             convert_lwe_multi_bit_programmable_bootstrap_key_async(
                 streams,
                 &mut d_vec,
-                bsk_as_ggsw_list.as_ref(),
+                bsk.as_ref(),
                 input_lwe_dimension,
                 glwe_dimension,
                 decomp_level_count,