diff --git a/tfhe/src/integer/gpu/server_key/radix/bitwise_op.rs b/tfhe/src/integer/gpu/server_key/radix/bitwise_op.rs
index 7ac7968c8f..198b94df49 100644
--- a/tfhe/src/integer/gpu/server_key/radix/bitwise_op.rs
+++ b/tfhe/src/integer/gpu/server_key/radix/bitwise_op.rs
@@ -29,24 +29,24 @@ impl CudaServerKey {
     /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
     ///
     /// let gpu_index = 0;
-    /// let mut stream = CudaStreams::new_single_gpu(gpu_index);
+    /// let mut streams = CudaStreams::new_single_gpu(gpu_index);
     ///
     /// // We have 4 * 2 = 8 bits of message
     /// let size = 4;
-    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
+    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
     ///
     /// let msg = 1u64;
     ///
     /// let ct = cks.encrypt(msg);
     ///
     /// // Copy to GPU
-    /// let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut stream);
+    /// let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut streams);
     ///
     /// // Compute homomorphically a bitwise and:
-    /// let d_ct_res = sks.unchecked_bitnot(&d_ct, &mut stream);
+    /// let d_ct_res = sks.unchecked_bitnot(&d_ct, &mut streams);
     ///
     /// // Copy back to CPU
-    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
+    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
     ///
     /// // Decrypt:
     /// let dec: u64 = cks.decrypt(&ct_res);
@@ -55,24 +55,24 @@ impl CudaServerKey {
     pub fn unchecked_bitnot<T: CudaIntegerRadixCiphertext>(
         &self,
         ct: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T {
-        let mut result = unsafe { ct.duplicate_async(stream) };
-        self.unchecked_bitnot_assign(&mut result, stream);
+        let mut result = unsafe { ct.duplicate_async(streams) };
+        self.unchecked_bitnot_assign(&mut result, streams);
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn unchecked_bitnot_assign_async<T: CudaIntegerRadixCiphertext>(
         &self,
         ct: &mut T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) {
         // We do (-ciphertext) + (msg_mod -1) as it allows to avoid an allocation
-        cuda_lwe_ciphertext_negate_assign(&mut ct.as_mut().d_blocks, stream);
+        cuda_lwe_ciphertext_negate_assign(&mut ct.as_mut().d_blocks, streams);
 
         let ct_blocks = ct.as_ref().d_blocks.lwe_ciphertext_count().0;
 
@@ -81,14 +81,21 @@ impl CudaServerKey {
         let shift_plaintext = u64::from(scalar) * delta;
 
         let scalar_vector = vec![shift_plaintext; ct_blocks];
-        let mut d_decomposed_scalar =
-            CudaVec::<u64>::new_async(ct.as_ref().d_blocks.lwe_ciphertext_count().0, stream, 0);
-        d_decomposed_scalar.copy_from_cpu_async(scalar_vector.as_slice(), stream, 0);
+        let mut d_decomposed_scalar = CudaVec::<u64>::new_async(
+            ct.as_ref().d_blocks.lwe_ciphertext_count().0,
+            streams,
+            streams.gpu_indexes[0],
+        );
+        d_decomposed_scalar.copy_from_cpu_async(
+            scalar_vector.as_slice(),
+            streams,
+            streams.gpu_indexes[0],
+        );
 
         cuda_lwe_ciphertext_plaintext_add_assign(
             &mut ct.as_mut().d_blocks,
             &d_decomposed_scalar,
-            stream,
+            streams,
         );
         ct.as_mut().info = ct.as_ref().info.after_bitnot();
     }
@@ -96,12 +103,12 @@ impl CudaServerKey {
     pub fn unchecked_bitnot_assign<T: CudaIntegerRadixCiphertext>(
         &self,
         ct: &mut T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) {
         unsafe {
-            self.unchecked_bitnot_assign_async(ct, stream);
+            self.unchecked_bitnot_assign_async(ct, streams);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 
     /// Computes homomorphically bitand between two ciphertexts encrypting integer values.
@@ -121,11 +128,11 @@ impl CudaServerKey {
     /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
     ///
     /// let gpu_index = 0;
-    /// let mut stream = CudaStreams::new_single_gpu(gpu_index);
+    /// let mut streams = CudaStreams::new_single_gpu(gpu_index);
     ///
     /// // We have 4 * 2 = 8 bits of message
     /// let size = 4;
-    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
+    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
     ///
     /// let msg1 = 201u64;
     /// let msg2 = 1u64;
@@ -134,14 +141,14 @@ impl CudaServerKey {
     /// let ct2 = cks.encrypt(msg2);
     ///
     /// // Copy to GPU
-    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream);
-    /// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream);
+    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut streams);
+    /// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut streams);
     ///
     /// // Compute homomorphically a bitwise and:
-    /// let d_ct_res = sks.unchecked_bitand(&d_ct1, &d_ct2, &mut stream);
+    /// let d_ct_res = sks.unchecked_bitand(&d_ct1, &d_ct2, &mut streams);
     ///
     /// // Copy back to CPU
-    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
+    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
     ///
     /// // Decrypt:
     /// let dec: u64 = cks.decrypt(&ct_res);
@@ -151,23 +158,23 @@ impl CudaServerKey {
         &self,
         ct_left: &T,
         ct_right: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T {
-        let mut result = unsafe { ct_left.duplicate_async(stream) };
-        self.unchecked_bitand_assign(&mut result, ct_right, stream);
+        let mut result = unsafe { ct_left.duplicate_async(streams) };
+        self.unchecked_bitand_assign(&mut result, ct_right, streams);
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn unchecked_bitop_assign_async<T: CudaIntegerRadixCiphertext>(
         &self,
         ct_left: &mut T,
         ct_right: &T,
         op: BitOpType,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) {
         assert_eq!(
             ct_left.as_ref().d_blocks.lwe_dimension(),
@@ -183,7 +190,7 @@ impl CudaServerKey {
         match &self.bootstrapping_key {
             CudaBootstrappingKey::Classic(d_bsk) => {
                 unchecked_bitop_integer_radix_kb_assign_async(
-                    stream,
+                    streams,
                     &mut ct_left.as_mut().d_blocks.0.d_vec,
                     &ct_right.as_ref().d_blocks.0.d_vec,
                     &d_bsk.d_vec,
@@ -210,7 +217,7 @@ impl CudaServerKey {
             }
             CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
                 unchecked_bitop_integer_radix_kb_assign_async(
-                    stream,
+                    streams,
                     &mut ct_left.as_mut().d_blocks.0.d_vec,
                     &ct_right.as_ref().d_blocks.0.d_vec,
                     &d_multibit_bsk.d_vec,
@@ -242,13 +249,13 @@ impl CudaServerKey {
         &self,
         ct_left: &mut T,
         ct_right: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) {
         unsafe {
-            self.unchecked_bitop_assign_async(ct_left, ct_right, BitOpType::And, stream);
+            self.unchecked_bitop_assign_async(ct_left, ct_right, BitOpType::And, streams);
             ct_left.as_mut().info = ct_left.as_ref().info.after_bitand(&ct_right.as_ref().info);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 
     /// Computes homomorphically bitor between two ciphertexts encrypting integer values.
@@ -268,11 +275,11 @@ impl CudaServerKey {
     /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
     ///
     /// let gpu_index = 0;
-    /// let mut stream = CudaStreams::new_single_gpu(gpu_index);
+    /// let mut streams = CudaStreams::new_single_gpu(gpu_index);
     ///
     /// // We have 4 * 2 = 8 bits of message
     /// let size = 4;
-    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
+    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
     ///
     /// let msg1 = 200u64;
     /// let msg2 = 1u64;
@@ -281,14 +288,14 @@ impl CudaServerKey {
     /// let ct2 = cks.encrypt(msg2);
     ///
     /// // Copy to GPU
-    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream);
-    /// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream);
+    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut streams);
+    /// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut streams);
     ///
     /// // Compute homomorphically a bitwise and:
-    /// let d_ct_res = sks.unchecked_bitor(&d_ct1, &d_ct2, &mut stream);
+    /// let d_ct_res = sks.unchecked_bitor(&d_ct1, &d_ct2, &mut streams);
     ///
     /// // Copy back to CPU
-    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
+    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
     ///
     /// // Decrypt:
     /// let dec: u64 = cks.decrypt(&ct_res);
@@ -298,10 +305,10 @@ impl CudaServerKey {
         &self,
         ct_left: &T,
         ct_right: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T {
-        let mut result = unsafe { ct_left.duplicate_async(stream) };
-        self.unchecked_bitor_assign(&mut result, ct_right, stream);
+        let mut result = unsafe { ct_left.duplicate_async(streams) };
+        self.unchecked_bitor_assign(&mut result, ct_right, streams);
         result
     }
 
@@ -309,13 +316,13 @@ impl CudaServerKey {
         &self,
         ct_left: &mut T,
         ct_right: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) {
         unsafe {
-            self.unchecked_bitop_assign_async(ct_left, ct_right, BitOpType::Or, stream);
+            self.unchecked_bitop_assign_async(ct_left, ct_right, BitOpType::Or, streams);
             ct_left.as_mut().info = ct_left.as_ref().info.after_bitor(&ct_right.as_ref().info);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 
     /// Computes homomorphically bitxor between two ciphertexts encrypting integer values.
@@ -335,11 +342,11 @@ impl CudaServerKey {
     /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
     ///
     /// let gpu_index = 0;
-    /// let mut stream = CudaStreams::new_single_gpu(gpu_index);
+    /// let mut streams = CudaStreams::new_single_gpu(gpu_index);
     ///
     /// // We have 4 * 2 = 8 bits of message
     /// let size = 4;
-    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
+    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
     ///
     /// let msg1 = 49;
     /// let msg2 = 64;
@@ -348,14 +355,14 @@ impl CudaServerKey {
     /// let ct2 = cks.encrypt(msg2);
     ///
     /// // Copy to GPU
-    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream);
-    /// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream);
+    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut streams);
+    /// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut streams);
     ///
     /// // Compute homomorphically a bitwise and:
-    /// let d_ct_res = sks.unchecked_bitxor(&d_ct1, &d_ct2, &mut stream);
+    /// let d_ct_res = sks.unchecked_bitxor(&d_ct1, &d_ct2, &mut streams);
     ///
     /// // Copy back to CPU
-    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
+    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
     ///
     /// // Decrypt:
     /// let dec: u64 = cks.decrypt(&ct_res);
@@ -365,10 +372,10 @@ impl CudaServerKey {
         &self,
         ct_left: &T,
         ct_right: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T {
-        let mut result = unsafe { ct_left.duplicate_async(stream) };
-        self.unchecked_bitxor_assign(&mut result, ct_right, stream);
+        let mut result = unsafe { ct_left.duplicate_async(streams) };
+        self.unchecked_bitxor_assign(&mut result, ct_right, streams);
         result
     }
 
@@ -376,13 +383,13 @@ impl CudaServerKey {
         &self,
         ct_left: &mut T,
         ct_right: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) {
         unsafe {
-            self.unchecked_bitop_assign_async(ct_left, ct_right, BitOpType::Xor, stream);
+            self.unchecked_bitop_assign_async(ct_left, ct_right, BitOpType::Xor, streams);
             ct_left.as_mut().info = ct_left.as_ref().info.after_bitxor(&ct_right.as_ref().info);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 
     /// Computes homomorphically bitand between two ciphertexts encrypting integer values.
@@ -402,11 +409,11 @@ impl CudaServerKey {
     /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
     ///
     /// let gpu_index = 0;
-    /// let mut stream = CudaStreams::new_single_gpu(gpu_index);
+    /// let mut streams = CudaStreams::new_single_gpu(gpu_index);
     ///
     /// // We have 4 * 2 = 8 bits of message
     /// let size = 4;
-    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
+    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
     ///
     /// let msg1 = 201u64;
     /// let msg2 = 1u64;
@@ -415,14 +422,14 @@ impl CudaServerKey {
     /// let ct2 = cks.encrypt(msg2);
     ///
     /// // Copy to GPU
-    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream);
-    /// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream);
+    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut streams);
+    /// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut streams);
     ///
     /// // Compute homomorphically a bitwise and:
-    /// let d_ct_res = sks.bitand(&d_ct1, &d_ct2, &mut stream);
+    /// let d_ct_res = sks.bitand(&d_ct1, &d_ct2, &mut streams);
     ///
     /// // Copy back to CPU
-    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
+    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
     ///
     /// // Decrypt:
     /// let dec: u64 = cks.decrypt(&ct_res);
@@ -432,22 +439,22 @@ impl CudaServerKey {
         &self,
         ct_left: &T,
         ct_right: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T {
-        let mut result = unsafe { ct_left.duplicate_async(stream) };
-        self.bitand_assign(&mut result, ct_right, stream);
+        let mut result = unsafe { ct_left.duplicate_async(streams) };
+        self.bitand_assign(&mut result, ct_right, streams);
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn bitand_assign_async<T: CudaIntegerRadixCiphertext>(
         &self,
         ct_left: &mut T,
         ct_right: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) {
         let mut tmp_rhs;
 
@@ -458,36 +465,36 @@ impl CudaServerKey {
             ) {
                 (true, true) => (ct_left, ct_right),
                 (true, false) => {
-                    tmp_rhs = ct_right.duplicate_async(stream);
-                    self.full_propagate_assign_async(&mut tmp_rhs, stream);
+                    tmp_rhs = ct_right.duplicate_async(streams);
+                    self.full_propagate_assign_async(&mut tmp_rhs, streams);
                     (ct_left, &tmp_rhs)
                 }
                 (false, true) => {
-                    self.full_propagate_assign_async(ct_left, stream);
+                    self.full_propagate_assign_async(ct_left, streams);
                     (ct_left, ct_right)
                 }
                 (false, false) => {
-                    tmp_rhs = ct_right.duplicate_async(stream);
+                    tmp_rhs = ct_right.duplicate_async(streams);
 
-                    self.full_propagate_assign_async(ct_left, stream);
-                    self.full_propagate_assign_async(&mut tmp_rhs, stream);
+                    self.full_propagate_assign_async(ct_left, streams);
+                    self.full_propagate_assign_async(&mut tmp_rhs, streams);
                     (ct_left, &tmp_rhs)
                 }
             }
         };
-        self.unchecked_bitop_assign_async(lhs, rhs, BitOpType::And, stream);
+        self.unchecked_bitop_assign_async(lhs, rhs, BitOpType::And, streams);
     }
 
     pub fn bitand_assign<T: CudaIntegerRadixCiphertext>(
         &self,
         ct_left: &mut T,
         ct_right: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) {
         unsafe {
-            self.bitand_assign_async(ct_left, ct_right, stream);
+            self.bitand_assign_async(ct_left, ct_right, streams);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 
     /// Computes homomorphically bitor between two ciphertexts encrypting integer values.
@@ -507,11 +514,11 @@ impl CudaServerKey {
     /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
     ///
     /// let gpu_index = 0;
-    /// let mut stream = CudaStreams::new_single_gpu(gpu_index);
+    /// let mut streams = CudaStreams::new_single_gpu(gpu_index);
     ///
     /// // We have 4 * 2 = 8 bits of message
     /// let size = 4;
-    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
+    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
     ///
     /// let msg1 = 201u64;
     /// let msg2 = 1u64;
@@ -520,14 +527,14 @@ impl CudaServerKey {
     /// let ct2 = cks.encrypt(msg2);
     ///
     /// // Copy to GPU
-    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream);
-    /// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream);
+    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut streams);
+    /// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut streams);
     ///
     /// // Compute homomorphically a bitwise and:
-    /// let d_ct_res = sks.bitor(&d_ct1, &d_ct2, &mut stream);
+    /// let d_ct_res = sks.bitor(&d_ct1, &d_ct2, &mut streams);
     ///
     /// // Copy back to CPU
-    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
+    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
     ///
     /// // Decrypt:
     /// let dec: u64 = cks.decrypt(&ct_res);
@@ -537,22 +544,22 @@ impl CudaServerKey {
         &self,
         ct_left: &T,
         ct_right: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T {
-        let mut result = unsafe { ct_left.duplicate_async(stream) };
-        self.bitor_assign(&mut result, ct_right, stream);
+        let mut result = unsafe { ct_left.duplicate_async(streams) };
+        self.bitor_assign(&mut result, ct_right, streams);
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn bitor_assign_async<T: CudaIntegerRadixCiphertext>(
         &self,
         ct_left: &mut T,
         ct_right: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) {
         let mut tmp_rhs;
 
@@ -562,36 +569,36 @@ impl CudaServerKey {
         ) {
             (true, true) => (ct_left, ct_right),
             (true, false) => {
-                tmp_rhs = ct_right.duplicate_async(stream);
-                self.full_propagate_assign_async(&mut tmp_rhs, stream);
+                tmp_rhs = ct_right.duplicate_async(streams);
+                self.full_propagate_assign_async(&mut tmp_rhs, streams);
                 (ct_left, &tmp_rhs)
             }
             (false, true) => {
-                self.full_propagate_assign_async(ct_left, stream);
+                self.full_propagate_assign_async(ct_left, streams);
                 (ct_left, ct_right)
             }
             (false, false) => {
-                tmp_rhs = ct_right.duplicate_async(stream);
+                tmp_rhs = ct_right.duplicate_async(streams);
 
-                self.full_propagate_assign_async(ct_left, stream);
-                self.full_propagate_assign_async(&mut tmp_rhs, stream);
+                self.full_propagate_assign_async(ct_left, streams);
+                self.full_propagate_assign_async(&mut tmp_rhs, streams);
                 (ct_left, &tmp_rhs)
             }
         };
 
-        self.unchecked_bitop_assign_async(lhs, rhs, BitOpType::Or, stream);
+        self.unchecked_bitop_assign_async(lhs, rhs, BitOpType::Or, streams);
     }
 
     pub fn bitor_assign<T: CudaIntegerRadixCiphertext>(
         &self,
         ct_left: &mut T,
         ct_right: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) {
         unsafe {
-            self.bitor_assign_async(ct_left, ct_right, stream);
+            self.bitor_assign_async(ct_left, ct_right, streams);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 
     /// Computes homomorphically bitxor between two ciphertexts encrypting integer values.
@@ -611,11 +618,11 @@ impl CudaServerKey {
     /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
     ///
     /// let gpu_index = 0;
-    /// let mut stream = CudaStreams::new_single_gpu(gpu_index);
+    /// let mut streams = CudaStreams::new_single_gpu(gpu_index);
     ///
     /// // We have 4 * 2 = 8 bits of message
     /// let size = 4;
-    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
+    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
     ///
     /// let msg1 = 201u64;
     /// let msg2 = 1u64;
@@ -624,14 +631,14 @@ impl CudaServerKey {
     /// let ct2 = cks.encrypt(msg2);
     ///
     /// // Copy to GPU
-    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream);
-    /// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream);
+    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut streams);
+    /// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut streams);
     ///
     /// // Compute homomorphically a bitwise and:
-    /// let d_ct_res = sks.bitxor(&d_ct1, &d_ct2, &mut stream);
+    /// let d_ct_res = sks.bitxor(&d_ct1, &d_ct2, &mut streams);
     ///
     /// // Copy back to CPU
-    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
+    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
     ///
     /// // Decrypt:
     /// let dec: u64 = cks.decrypt(&ct_res);
@@ -641,22 +648,22 @@ impl CudaServerKey {
         &self,
         ct_left: &T,
         ct_right: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T {
-        let mut result = unsafe { ct_left.duplicate_async(stream) };
-        self.bitxor_assign(&mut result, ct_right, stream);
+        let mut result = unsafe { ct_left.duplicate_async(streams) };
+        self.bitxor_assign(&mut result, ct_right, streams);
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn bitxor_assign_async<T: CudaIntegerRadixCiphertext>(
         &self,
         ct_left: &mut T,
         ct_right: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) {
         let mut tmp_rhs;
 
@@ -666,36 +673,36 @@ impl CudaServerKey {
         ) {
             (true, true) => (ct_left, ct_right),
             (true, false) => {
-                tmp_rhs = ct_right.duplicate_async(stream);
-                self.full_propagate_assign_async(&mut tmp_rhs, stream);
+                tmp_rhs = ct_right.duplicate_async(streams);
+                self.full_propagate_assign_async(&mut tmp_rhs, streams);
                 (ct_left, &tmp_rhs)
             }
             (false, true) => {
-                self.full_propagate_assign_async(ct_left, stream);
+                self.full_propagate_assign_async(ct_left, streams);
                 (ct_left, ct_right)
             }
             (false, false) => {
-                tmp_rhs = ct_right.duplicate_async(stream);
+                tmp_rhs = ct_right.duplicate_async(streams);
 
-                self.full_propagate_assign_async(ct_left, stream);
-                self.full_propagate_assign_async(&mut tmp_rhs, stream);
+                self.full_propagate_assign_async(ct_left, streams);
+                self.full_propagate_assign_async(&mut tmp_rhs, streams);
                 (ct_left, &tmp_rhs)
             }
         };
 
-        self.unchecked_bitop_assign_async(lhs, rhs, BitOpType::Xor, stream);
+        self.unchecked_bitop_assign_async(lhs, rhs, BitOpType::Xor, streams);
     }
 
     pub fn bitxor_assign<T: CudaIntegerRadixCiphertext>(
         &self,
         ct_left: &mut T,
         ct_right: &T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) {
         unsafe {
-            self.bitxor_assign_async(ct_left, ct_right, stream);
+            self.bitxor_assign_async(ct_left, ct_right, streams);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 
     /// Computes homomorphically bitnot for an encrypted integer value.
@@ -716,55 +723,55 @@ impl CudaServerKey {
     /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
     ///
     /// let gpu_index = 0;
-    /// let mut stream = CudaStreams::new_single_gpu(gpu_index);
+    /// let mut streams = CudaStreams::new_single_gpu(gpu_index);
     ///
     /// // We have 4 * 2 = 8 bits of message
     /// let size = 4;
-    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
+    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
     ///
     /// let msg = 1u64;
     ///
     /// let ct = cks.encrypt(msg);
     ///
     /// // Copy to GPU
-    /// let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut stream);
+    /// let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut streams);
     ///
     /// // Compute homomorphically a bitwise and:
-    /// let d_ct_res = sks.bitnot(&d_ct, &mut stream);
+    /// let d_ct_res = sks.bitnot(&d_ct, &mut streams);
     ///
     /// // Copy back to CPU
-    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
+    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
     ///
     /// // Decrypt:
     /// let dec: u64 = cks.decrypt(&ct_res);
     /// assert_eq!(dec, !msg % 256);
     /// ```
-    pub fn bitnot<T: CudaIntegerRadixCiphertext>(&self, ct: &T, stream: &CudaStreams) -> T {
-        let mut result = unsafe { ct.duplicate_async(stream) };
-        self.bitnot_assign(&mut result, stream);
+    pub fn bitnot<T: CudaIntegerRadixCiphertext>(&self, ct: &T, streams: &CudaStreams) -> T {
+        let mut result = unsafe { ct.duplicate_async(streams) };
+        self.bitnot_assign(&mut result, streams);
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn bitnot_assign_async<T: CudaIntegerRadixCiphertext>(
         &self,
         ct: &mut T,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) {
         if !ct.block_carries_are_empty() {
-            self.full_propagate_assign_async(ct, stream);
+            self.full_propagate_assign_async(ct, streams);
         }
 
-        self.unchecked_bitnot_assign_async(ct, stream);
+        self.unchecked_bitnot_assign_async(ct, streams);
     }
 
-    pub fn bitnot_assign<T: CudaIntegerRadixCiphertext>(&self, ct: &mut T, stream: &CudaStreams) {
+    pub fn bitnot_assign<T: CudaIntegerRadixCiphertext>(&self, ct: &mut T, streams: &CudaStreams) {
         unsafe {
-            self.bitnot_assign_async(ct, stream);
+            self.bitnot_assign_async(ct, streams);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 }
diff --git a/tfhe/src/integer/gpu/server_key/radix/mod.rs b/tfhe/src/integer/gpu/server_key/radix/mod.rs
index fcd1f52848..bf7c84b705 100644
--- a/tfhe/src/integer/gpu/server_key/radix/mod.rs
+++ b/tfhe/src/integer/gpu/server_key/radix/mod.rs
@@ -327,25 +327,26 @@ impl CudaServerKey {
         &self,
         ct: &T,
         num_blocks: usize,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T {
         let new_num_blocks = ct.as_ref().d_blocks.lwe_ciphertext_count().0 + num_blocks;
         let ciphertext_modulus = ct.as_ref().d_blocks.ciphertext_modulus();
         let lwe_size = ct.as_ref().d_blocks.lwe_dimension().to_lwe_size();
         let shift = num_blocks * lwe_size.0;
 
-        let mut extended_ct_vec =
-            unsafe { CudaVec::new_async(new_num_blocks * lwe_size.0, stream, 0) };
+        let mut extended_ct_vec = unsafe {
+            CudaVec::new_async(new_num_blocks * lwe_size.0, streams, streams.gpu_indexes[0])
+        };
         unsafe {
-            extended_ct_vec.memset_async(0u64, stream, 0);
+            extended_ct_vec.memset_async(0u64, streams, streams.gpu_indexes[0]);
             extended_ct_vec.copy_self_range_gpu_to_gpu_async(
                 shift..,
                 &ct.as_ref().d_blocks.0.d_vec,
-                stream,
+                streams,
                 0,
             );
         }
-        stream.synchronize();
+        streams.synchronize();
         let extended_ct_list = CudaLweCiphertextList::from_cuda_vec(
             extended_ct_vec,
             LweCiphertextCount(new_num_blocks),
@@ -398,19 +399,24 @@ impl CudaServerKey {
         &self,
         ct: &T,
         num_blocks: usize,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T {
         let new_num_blocks = ct.as_ref().d_blocks.lwe_ciphertext_count().0 + num_blocks;
         let ciphertext_modulus = ct.as_ref().d_blocks.ciphertext_modulus();
         let lwe_size = ct.as_ref().d_blocks.lwe_dimension().to_lwe_size();
 
-        let mut extended_ct_vec =
-            unsafe { CudaVec::new_async(new_num_blocks * lwe_size.0, stream, 0) };
+        let mut extended_ct_vec = unsafe {
+            CudaVec::new_async(new_num_blocks * lwe_size.0, streams, streams.gpu_indexes[0])
+        };
         unsafe {
-            extended_ct_vec.memset_async(0u64, stream, 0);
-            extended_ct_vec.copy_from_gpu_async(&ct.as_ref().d_blocks.0.d_vec, stream, 0);
+            extended_ct_vec.memset_async(0u64, streams, streams.gpu_indexes[0]);
+            extended_ct_vec.copy_from_gpu_async(
+                &ct.as_ref().d_blocks.0.d_vec,
+                streams,
+                streams.gpu_indexes[0],
+            );
         }
-        stream.synchronize();
+        streams.synchronize();
         let extended_ct_list = CudaLweCiphertextList::from_cuda_vec(
             extended_ct_vec,
             LweCiphertextCount(new_num_blocks),
@@ -463,24 +469,25 @@ impl CudaServerKey {
         &self,
         ct: &T,
         num_blocks: usize,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T {
         let new_num_blocks = ct.as_ref().d_blocks.lwe_ciphertext_count().0 - num_blocks;
         let ciphertext_modulus = ct.as_ref().d_blocks.ciphertext_modulus();
         let lwe_size = ct.as_ref().d_blocks.lwe_dimension().to_lwe_size();
         let shift = num_blocks * lwe_size.0;
 
-        let mut trimmed_ct_vec =
-            unsafe { CudaVec::new_async(new_num_blocks * lwe_size.0, stream, 0) };
+        let mut trimmed_ct_vec = unsafe {
+            CudaVec::new_async(new_num_blocks * lwe_size.0, streams, streams.gpu_indexes[0])
+        };
         unsafe {
             trimmed_ct_vec.copy_src_range_gpu_to_gpu_async(
                 shift..,
                 &ct.as_ref().d_blocks.0.d_vec,
-                stream,
+                streams,
                 0,
             );
         }
-        stream.synchronize();
+        streams.synchronize();
         let trimmed_ct_list = CudaLweCiphertextList::from_cuda_vec(
             trimmed_ct_vec,
             LweCiphertextCount(new_num_blocks),
@@ -530,24 +537,25 @@ impl CudaServerKey {
         &self,
         ct: &T,
         num_blocks: usize,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T {
         let new_num_blocks = ct.as_ref().d_blocks.lwe_ciphertext_count().0 - num_blocks;
         let ciphertext_modulus = ct.as_ref().d_blocks.ciphertext_modulus();
         let lwe_size = ct.as_ref().d_blocks.lwe_dimension().to_lwe_size();
         let shift = new_num_blocks * lwe_size.0;
 
-        let mut trimmed_ct_vec =
-            unsafe { CudaVec::new_async(new_num_blocks * lwe_size.0, stream, 0) };
+        let mut trimmed_ct_vec = unsafe {
+            CudaVec::new_async(new_num_blocks * lwe_size.0, streams, streams.gpu_indexes[0])
+        };
         unsafe {
             trimmed_ct_vec.copy_src_range_gpu_to_gpu_async(
                 0..shift,
                 &ct.as_ref().d_blocks.0.d_vec,
-                stream,
+                streams,
                 0,
             );
         }
-        stream.synchronize();
+        streams.synchronize();
         let trimmed_ct_list = CudaLweCiphertextList::from_cuda_vec(
             trimmed_ct_vec,
             LweCiphertextCount(new_num_blocks),
@@ -594,7 +602,7 @@ impl CudaServerKey {
         &self,
         ct: &T,
         num_blocks: usize,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T {
         let message_modulus = self.message_modulus.0 as u64;
         let num_bits_in_block = message_modulus.ilog2();
@@ -612,28 +620,40 @@ impl CudaServerKey {
         let lwe_size = ct.as_ref().d_blocks.0.lwe_dimension.to_lwe_size().0;
 
         // Allocate the necessary amount of memory
-        let mut output_radix = CudaVec::new(new_num_ct_blocks * lwe_size, stream, 0);
+        let mut output_radix = CudaVec::new(
+            new_num_ct_blocks * lwe_size,
+            streams,
+            streams.gpu_indexes[0],
+        );
         unsafe {
-            output_radix.copy_from_gpu_async(&ct.as_ref().d_blocks.0.d_vec, stream, 0);
+            output_radix.copy_from_gpu_async(
+                &ct.as_ref().d_blocks.0.d_vec,
+                streams,
+                streams.gpu_indexes[0],
+            );
             // Get the last ct block
             let last_block = ct
                 .as_ref()
                 .d_blocks
                 .0
                 .d_vec
-                .as_slice(lwe_size * (num_ct_blocks - 1).., 0)
+                .as_slice(lwe_size * (num_ct_blocks - 1).., streams.gpu_indexes[0])
                 .unwrap();
             let mut output_slice = output_radix
-                .as_mut_slice(lwe_size * num_ct_blocks..lwe_size * new_num_ct_blocks, 0)
+                .as_mut_slice(
+                    lwe_size * num_ct_blocks..lwe_size * new_num_ct_blocks,
+                    streams.gpu_indexes[0],
+                )
                 .unwrap();
-            let (padding_block, new_blocks) = output_slice.split_at_mut(lwe_size, 0);
+            let (padding_block, new_blocks) =
+                output_slice.split_at_mut(lwe_size, streams.gpu_indexes[0]);
             let mut padding_block = padding_block.unwrap();
             let mut new_blocks = new_blocks.unwrap();
 
             match &self.bootstrapping_key {
                 CudaBootstrappingKey::Classic(d_bsk) => {
                     apply_univariate_lut_kb_async(
-                        stream,
+                        streams,
                         &mut padding_block,
                         &last_block,
                         padding_block_creator_lut.acc.as_ref(),
@@ -657,7 +677,7 @@ impl CudaServerKey {
                 }
                 CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
                     apply_univariate_lut_kb_async(
-                        stream,
+                        streams,
                         &mut padding_block,
                         &last_block,
                         padding_block_creator_lut.acc.as_ref(),
@@ -682,12 +702,12 @@ impl CudaServerKey {
             }
             for i in 0..num_blocks - 1 {
                 let mut output_block = new_blocks
-                    .get_mut(lwe_size * i..lwe_size * (i + 1), 0)
+                    .get_mut(lwe_size * i..lwe_size * (i + 1), streams.gpu_indexes[0])
                     .unwrap();
-                output_block.copy_from_gpu_async(&padding_block, stream, 0);
+                output_block.copy_from_gpu_async(&padding_block, streams, streams.gpu_indexes[0]);
             }
         }
-        stream.synchronize();
+        streams.synchronize();
         let output_lwe_list = CudaLweCiphertextList(CudaLweList {
             d_vec: output_radix,
             lwe_ciphertext_count: LweCiphertextCount(new_num_ct_blocks),
diff --git a/tfhe/src/integer/gpu/server_key/radix/scalar_bitwise_op.rs b/tfhe/src/integer/gpu/server_key/radix/scalar_bitwise_op.rs
index 4211f860da..9afa8ba40c 100644
--- a/tfhe/src/integer/gpu/server_key/radix/scalar_bitwise_op.rs
+++ b/tfhe/src/integer/gpu/server_key/radix/scalar_bitwise_op.rs
@@ -11,14 +11,14 @@ use crate::integer::gpu::{
 impl CudaServerKey {
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn unchecked_scalar_bitop_assign_async<Scalar, T>(
         &self,
         ct: &mut T,
         rhs: Scalar,
         op: BitOpType,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
@@ -31,12 +31,13 @@ impl CudaServerKey {
             .map(|x| x as u64)
             .collect::<Vec<_>>();
 
-        let clear_blocks = CudaVec::from_cpu_async(&h_clear_blocks, stream, 0);
+        let clear_blocks =
+            CudaVec::from_cpu_async(&h_clear_blocks, streams, streams.gpu_indexes[0]);
 
         match &self.bootstrapping_key {
             CudaBootstrappingKey::Classic(d_bsk) => {
                 unchecked_scalar_bitop_integer_radix_kb_assign_async(
-                    stream,
+                    streams,
                     &mut ct.as_mut().d_blocks.0.d_vec,
                     &clear_blocks,
                     &d_bsk.d_vec,
@@ -63,7 +64,7 @@ impl CudaServerKey {
             }
             CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
                 unchecked_scalar_bitop_integer_radix_kb_assign_async(
-                    stream,
+                    streams,
                     &mut ct.as_mut().d_blocks.0.d_vec,
                     &clear_blocks,
                     &d_multibit_bsk.d_vec,
@@ -91,13 +92,18 @@ impl CudaServerKey {
         }
     }
 
-    pub fn unchecked_scalar_bitand<Scalar, T>(&self, ct: &T, rhs: Scalar, stream: &CudaStreams) -> T
+    pub fn unchecked_scalar_bitand<Scalar, T>(
+        &self,
+        ct: &T,
+        rhs: Scalar,
+        streams: &CudaStreams,
+    ) -> T
     where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
     {
-        let mut result = unsafe { ct.duplicate_async(stream) };
-        self.unchecked_scalar_bitand_assign(&mut result, rhs, stream);
+        let mut result = unsafe { ct.duplicate_async(streams) };
+        self.unchecked_scalar_bitand_assign(&mut result, rhs, streams);
         result
     }
 
@@ -105,25 +111,25 @@ impl CudaServerKey {
         &self,
         ct: &mut T,
         rhs: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
     {
         unsafe {
-            self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarAnd, stream);
+            self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarAnd, streams);
             ct.as_mut().info = ct.as_ref().info.after_scalar_bitand(rhs);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 
-    pub fn unchecked_scalar_bitor<Scalar, T>(&self, ct: &T, rhs: Scalar, stream: &CudaStreams) -> T
+    pub fn unchecked_scalar_bitor<Scalar, T>(&self, ct: &T, rhs: Scalar, streams: &CudaStreams) -> T
     where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
     {
-        let mut result = unsafe { ct.duplicate_async(stream) };
-        self.unchecked_scalar_bitor_assign(&mut result, rhs, stream);
+        let mut result = unsafe { ct.duplicate_async(streams) };
+        self.unchecked_scalar_bitor_assign(&mut result, rhs, streams);
         result
     }
 
@@ -131,25 +137,30 @@ impl CudaServerKey {
         &self,
         ct: &mut T,
         rhs: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
     {
         unsafe {
-            self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarOr, stream);
+            self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarOr, streams);
             ct.as_mut().info = ct.as_ref().info.after_scalar_bitor(rhs);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 
-    pub fn unchecked_scalar_bitxor<Scalar, T>(&self, ct: &T, rhs: Scalar, stream: &CudaStreams) -> T
+    pub fn unchecked_scalar_bitxor<Scalar, T>(
+        &self,
+        ct: &T,
+        rhs: Scalar,
+        streams: &CudaStreams,
+    ) -> T
     where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
     {
-        let mut result = unsafe { ct.duplicate_async(stream) };
-        self.unchecked_scalar_bitxor_assign(&mut result, rhs, stream);
+        let mut result = unsafe { ct.duplicate_async(streams) };
+        self.unchecked_scalar_bitxor_assign(&mut result, rhs, streams);
         result
     }
 
@@ -157,138 +168,138 @@ impl CudaServerKey {
         &self,
         ct: &mut T,
         rhs: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
     {
         unsafe {
-            self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarXor, stream);
+            self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarXor, streams);
             ct.as_mut().info = ct.as_ref().info.after_scalar_bitxor(rhs);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn scalar_bitand_assign_async<Scalar, T>(
         &self,
         ct: &mut T,
         rhs: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
     {
         if !ct.block_carries_are_empty() {
-            self.full_propagate_assign_async(ct, stream);
+            self.full_propagate_assign_async(ct, streams);
         }
-        self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarAnd, stream);
+        self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarAnd, streams);
         ct.as_mut().info = ct.as_ref().info.after_scalar_bitand(rhs);
     }
 
-    pub fn scalar_bitand_assign<Scalar, T>(&self, ct: &mut T, rhs: Scalar, stream: &CudaStreams)
+    pub fn scalar_bitand_assign<Scalar, T>(&self, ct: &mut T, rhs: Scalar, streams: &CudaStreams)
     where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
     {
         unsafe {
-            self.scalar_bitand_assign_async(ct, rhs, stream);
+            self.scalar_bitand_assign_async(ct, rhs, streams);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 
-    pub fn scalar_bitand<Scalar, T>(&self, ct: &T, rhs: Scalar, stream: &CudaStreams) -> T
+    pub fn scalar_bitand<Scalar, T>(&self, ct: &T, rhs: Scalar, streams: &CudaStreams) -> T
     where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
     {
-        let mut result = unsafe { ct.duplicate_async(stream) };
-        self.scalar_bitand_assign(&mut result, rhs, stream);
+        let mut result = unsafe { ct.duplicate_async(streams) };
+        self.scalar_bitand_assign(&mut result, rhs, streams);
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn scalar_bitor_assign_async<Scalar, T>(
         &self,
         ct: &mut T,
         rhs: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
     {
         if !ct.block_carries_are_empty() {
-            self.full_propagate_assign_async(ct, stream);
+            self.full_propagate_assign_async(ct, streams);
         }
-        self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarOr, stream);
+        self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarOr, streams);
         ct.as_mut().info = ct.as_ref().info.after_scalar_bitor(rhs);
     }
 
-    pub fn scalar_bitor_assign<Scalar, T>(&self, ct: &mut T, rhs: Scalar, stream: &CudaStreams)
+    pub fn scalar_bitor_assign<Scalar, T>(&self, ct: &mut T, rhs: Scalar, streams: &CudaStreams)
     where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
     {
         unsafe {
-            self.scalar_bitor_assign_async(ct, rhs, stream);
+            self.scalar_bitor_assign_async(ct, rhs, streams);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 
-    pub fn scalar_bitor<Scalar, T>(&self, ct: &T, rhs: Scalar, stream: &CudaStreams) -> T
+    pub fn scalar_bitor<Scalar, T>(&self, ct: &T, rhs: Scalar, streams: &CudaStreams) -> T
     where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
     {
-        let mut result = unsafe { ct.duplicate_async(stream) };
-        self.scalar_bitor_assign(&mut result, rhs, stream);
+        let mut result = unsafe { ct.duplicate_async(streams) };
+        self.scalar_bitor_assign(&mut result, rhs, streams);
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn scalar_bitxor_assign_async<Scalar, T>(
         &self,
         ct: &mut T,
         rhs: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
     {
         if !ct.block_carries_are_empty() {
-            self.full_propagate_assign_async(ct, stream);
+            self.full_propagate_assign_async(ct, streams);
         }
-        self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarXor, stream);
+        self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarXor, streams);
         ct.as_mut().info = ct.as_ref().info.after_scalar_bitxor(rhs);
     }
 
-    pub fn scalar_bitxor_assign<Scalar, T>(&self, ct: &mut T, rhs: Scalar, stream: &CudaStreams)
+    pub fn scalar_bitxor_assign<Scalar, T>(&self, ct: &mut T, rhs: Scalar, streams: &CudaStreams)
     where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
     {
         unsafe {
-            self.scalar_bitxor_assign_async(ct, rhs, stream);
+            self.scalar_bitxor_assign_async(ct, rhs, streams);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 
-    pub fn scalar_bitxor<Scalar, T>(&self, ct: &T, rhs: Scalar, stream: &CudaStreams) -> T
+    pub fn scalar_bitxor<Scalar, T>(&self, ct: &T, rhs: Scalar, streams: &CudaStreams) -> T
     where
         Scalar: DecomposableInto<u8>,
         T: CudaIntegerRadixCiphertext,
     {
-        let mut result = unsafe { ct.duplicate_async(stream) };
-        self.scalar_bitxor_assign(&mut result, rhs, stream);
+        let mut result = unsafe { ct.duplicate_async(streams) };
+        self.scalar_bitxor_assign(&mut result, rhs, streams);
         result
     }
 }
diff --git a/tfhe/src/integer/gpu/server_key/radix/scalar_comparison.rs b/tfhe/src/integer/gpu/server_key/radix/scalar_comparison.rs
index e3cc0a5c7f..5b6639082f 100644
--- a/tfhe/src/integer/gpu/server_key/radix/scalar_comparison.rs
+++ b/tfhe/src/integer/gpu/server_key/radix/scalar_comparison.rs
@@ -102,15 +102,15 @@ impl CudaServerKey {
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn unchecked_signed_and_unsigned_scalar_comparison_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
         op: ComparisonType,
         signed_with_positive_scalar: bool,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
@@ -122,7 +122,7 @@ impl CudaServerKey {
                 ComparisonType::GT | ComparisonType::GE | ComparisonType::NE => 1,
                 _ => 0,
             };
-            let ct_res: T = self.create_trivial_radix(value, 1, stream);
+            let ct_res: T = self.create_trivial_radix(value, 1, streams);
             return CudaBooleanBlock::from_cuda_radix_ciphertext(ct_res.into_inner());
         }
 
@@ -144,7 +144,7 @@ impl CudaServerKey {
                 ComparisonType::LT | ComparisonType::LE | ComparisonType::NE => 1,
                 _ => 0,
             };
-            let ct_res: T = self.create_trivial_radix(value, 1, stream);
+            let ct_res: T = self.create_trivial_radix(value, 1, streams);
             return CudaBooleanBlock::from_cuda_radix_ciphertext(ct_res.into_inner());
         }
 
@@ -153,7 +153,8 @@ impl CudaServerKey {
         // as we will handle them separately.
         scalar_blocks.truncate(ct.as_ref().d_blocks.lwe_ciphertext_count().0);
 
-        let d_scalar_blocks: CudaVec<u64> = CudaVec::from_cpu_async(&scalar_blocks, stream, 0);
+        let d_scalar_blocks: CudaVec<u64> =
+            CudaVec::from_cpu_async(&scalar_blocks, streams, streams.gpu_indexes[0]);
 
         let lwe_ciphertext_count = ct.as_ref().d_blocks.lwe_ciphertext_count();
 
@@ -161,7 +162,7 @@ impl CudaServerKey {
             ct.as_ref().d_blocks.lwe_dimension(),
             LweCiphertextCount(1),
             CiphertextModulus::new_native(),
-            stream,
+            streams,
         );
         let mut block_info = ct.as_ref().info.blocks[0];
         block_info.degree = Degree::new(0);
@@ -174,7 +175,7 @@ impl CudaServerKey {
         match &self.bootstrapping_key {
             CudaBootstrappingKey::Classic(d_bsk) => {
                 unchecked_scalar_comparison_integer_radix_kb_async(
-                    stream,
+                    streams,
                     &mut result.as_mut().ciphertext.d_blocks.0.d_vec,
                     &ct.as_ref().d_blocks.0.d_vec,
                     &d_scalar_blocks,
@@ -204,7 +205,7 @@ impl CudaServerKey {
             }
             CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
                 unchecked_scalar_comparison_integer_radix_kb_async(
-                    stream,
+                    streams,
                     &mut result.as_mut().ciphertext.d_blocks.0.d_vec,
                     &ct.as_ref().d_blocks.0.d_vec,
                     &d_scalar_blocks,
@@ -239,14 +240,14 @@ impl CudaServerKey {
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn unchecked_scalar_comparison_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
         op: ComparisonType,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
@@ -260,12 +261,12 @@ impl CudaServerKey {
                     // Scalar is greater than the bounds, so ciphertext is smaller
                     let result: T = match op {
                         ComparisonType::LT | ComparisonType::LE => {
-                            self.create_trivial_radix(1, num_blocks, stream)
+                            self.create_trivial_radix(1, num_blocks, streams)
                         }
                         _ => self.create_trivial_radix(
                             0,
                             ct.as_ref().d_blocks.lwe_ciphertext_count().0,
-                            stream,
+                            streams,
                         ),
                     };
                     return CudaBooleanBlock::from_cuda_radix_ciphertext(result.into_inner());
@@ -274,12 +275,12 @@ impl CudaServerKey {
                     // Scalar is smaller than the bounds, so ciphertext is bigger
                     let result: T = match op {
                         ComparisonType::GT | ComparisonType::GE => {
-                            self.create_trivial_radix(1, num_blocks, stream)
+                            self.create_trivial_radix(1, num_blocks, streams)
                         }
                         _ => self.create_trivial_radix(
                             0,
                             ct.as_ref().d_blocks.lwe_ciphertext_count().0,
-                            stream,
+                            streams,
                         ),
                     };
                     return CudaBooleanBlock::from_cuda_radix_ciphertext(result.into_inner());
@@ -292,29 +293,29 @@ impl CudaServerKey {
 
             if scalar >= Scalar::ZERO {
                 self.unchecked_signed_and_unsigned_scalar_comparison_async(
-                    ct, scalar, op, true, stream,
+                    ct, scalar, op, true, streams,
                 )
             } else {
-                let scalar_as_trivial = self.create_trivial_radix(scalar, num_blocks, stream);
-                self.unchecked_comparison_async(ct, &scalar_as_trivial, op, stream)
+                let scalar_as_trivial = self.create_trivial_radix(scalar, num_blocks, streams);
+                self.unchecked_comparison_async(ct, &scalar_as_trivial, op, streams)
             }
         } else {
             // Unsigned
             self.unchecked_signed_and_unsigned_scalar_comparison_async(
-                ct, scalar, op, false, stream,
+                ct, scalar, op, false, streams,
             )
         }
     }
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn unchecked_scalar_minmax_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
         op: ComparisonType,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T
     where
         T: CudaIntegerRadixCiphertext,
@@ -327,16 +328,17 @@ impl CudaServerKey {
                 .iter_as::<u64>()
                 .collect::<Vec<_>>();
 
-        let d_scalar_blocks: CudaVec<u64> = CudaVec::from_cpu_async(&scalar_blocks, stream, 0);
+        let d_scalar_blocks: CudaVec<u64> =
+            CudaVec::from_cpu_async(&scalar_blocks, streams, streams.gpu_indexes[0]);
 
         let lwe_ciphertext_count = ct.as_ref().d_blocks.lwe_ciphertext_count();
 
-        let mut result = ct.duplicate_async(stream);
+        let mut result = ct.duplicate_async(streams);
 
         match &self.bootstrapping_key {
             CudaBootstrappingKey::Classic(d_bsk) => {
                 unchecked_scalar_comparison_integer_radix_kb_async(
-                    stream,
+                    streams,
                     &mut result.as_mut().d_blocks.0.d_vec,
                     &ct.as_ref().d_blocks.0.d_vec,
                     &d_scalar_blocks,
@@ -366,7 +368,7 @@ impl CudaServerKey {
             }
             CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
                 unchecked_scalar_comparison_integer_radix_kb_async(
-                    stream,
+                    streams,
                     &mut result.as_mut().d_blocks.0.d_vec,
                     &ct.as_ref().d_blocks.0.d_vec,
                     &d_scalar_blocks,
@@ -401,45 +403,45 @@ impl CudaServerKey {
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn unchecked_scalar_eq_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         T: CudaIntegerRadixCiphertext,
         Scalar: DecomposableInto<u64>,
     {
-        self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::EQ, stream)
+        self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::EQ, streams)
     }
 
     pub fn unchecked_scalar_eq<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         T: CudaIntegerRadixCiphertext,
         Scalar: DecomposableInto<u64>,
     {
-        let result = unsafe { self.unchecked_scalar_eq_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.unchecked_scalar_eq_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn scalar_eq_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         T: CudaIntegerRadixCiphertext,
@@ -449,12 +451,12 @@ impl CudaServerKey {
         let lhs = if ct.block_carries_are_empty() {
             ct
         } else {
-            tmp_lhs = ct.duplicate_async(stream);
-            self.full_propagate_assign_async(&mut tmp_lhs, stream);
+            tmp_lhs = ct.duplicate_async(streams);
+            self.full_propagate_assign_async(&mut tmp_lhs, streams);
             &tmp_lhs
         };
 
-        self.unchecked_scalar_eq_async(lhs, scalar, stream)
+        self.unchecked_scalar_eq_async(lhs, scalar, streams)
     }
 
     /// Compares for equality 2 ciphertexts
@@ -473,12 +475,12 @@ impl CudaServerKey {
     /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
     ///
     /// let gpu_index = 0;
-    /// let mut stream = CudaStreams::new_single_gpu(gpu_index);
+    /// let mut streams = CudaStreams::new_single_gpu(gpu_index);
     ///
     /// let size = 4;
     ///
     /// // Generate the client key and the server key:
-    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &stream);
+    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &streams);
     ///
     /// let msg1 = 14u64;
     /// let msg2 = 97u64;
@@ -486,12 +488,12 @@ impl CudaServerKey {
     /// let ct1 = cks.encrypt(msg1);
     ///
     /// // Copy to GPU
-    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &stream);
+    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &streams);
     ///
-    /// let d_ct_res = sks.scalar_eq(&d_ct1, msg2, &stream);
+    /// let d_ct_res = sks.scalar_eq(&d_ct1, msg2, &streams);
     ///
     /// // Copy the result back to CPU
-    /// let ct_res = d_ct_res.to_boolean_block(&stream);
+    /// let ct_res = d_ct_res.to_boolean_block(&streams);
     ///
     /// // Decrypt:
     /// let dec_result = cks.decrypt_bool(&ct_res);
@@ -501,26 +503,26 @@ impl CudaServerKey {
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         T: CudaIntegerRadixCiphertext,
         Scalar: DecomposableInto<u64>,
     {
-        let result = unsafe { self.scalar_eq_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.scalar_eq_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn scalar_ne_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         T: CudaIntegerRadixCiphertext,
@@ -530,12 +532,12 @@ impl CudaServerKey {
         let lhs = if ct.block_carries_are_empty() {
             ct
         } else {
-            tmp_lhs = ct.duplicate_async(stream);
-            self.full_propagate_assign_async(&mut tmp_lhs, stream);
+            tmp_lhs = ct.duplicate_async(streams);
+            self.full_propagate_assign_async(&mut tmp_lhs, streams);
             &tmp_lhs
         };
 
-        self.unchecked_scalar_ne_async(lhs, scalar, stream)
+        self.unchecked_scalar_ne_async(lhs, scalar, streams)
     }
 
     /// Compares for equality 2 ciphertexts
@@ -554,12 +556,12 @@ impl CudaServerKey {
     /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
     ///
     /// let gpu_index = 0;
-    /// let mut stream = CudaStreams::new_single_gpu(gpu_index);
+    /// let mut streams = CudaStreams::new_single_gpu(gpu_index);
     ///
     /// let size = 4;
     ///
     /// // Generate the client key and the server key:
-    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &stream);
+    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &streams);
     ///
     /// let msg1 = 14u64;
     /// let msg2 = 97u64;
@@ -567,12 +569,12 @@ impl CudaServerKey {
     /// let ct1 = cks.encrypt(msg1);
     ///
     /// // Copy to GPU
-    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &stream);
+    /// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &streams);
     ///
-    /// let d_ct_res = sks.scalar_ne(&d_ct1, msg2, &stream);
+    /// let d_ct_res = sks.scalar_ne(&d_ct1, msg2, &streams);
     ///
     /// // Copy the result back to CPU
-    /// let ct_res = d_ct_res.to_boolean_block(&stream);
+    /// let ct_res = d_ct_res.to_boolean_block(&streams);
     ///
     /// // Decrypt:
     /// let dec_result = cks.decrypt_bool(&ct_res);
@@ -582,185 +584,185 @@ impl CudaServerKey {
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        let result = unsafe { self.scalar_ne_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.scalar_ne_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn unchecked_scalar_ne_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         T: CudaIntegerRadixCiphertext,
         Scalar: DecomposableInto<u64>,
     {
-        self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::NE, stream)
+        self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::NE, streams)
     }
 
     pub fn unchecked_scalar_ne<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         T: CudaIntegerRadixCiphertext,
         Scalar: DecomposableInto<u64>,
     {
-        let result = unsafe { self.unchecked_scalar_ne_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.unchecked_scalar_ne_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn unchecked_scalar_gt_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::GT, stream)
+        self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::GT, streams)
     }
 
     pub fn unchecked_scalar_gt<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        let result = unsafe { self.unchecked_scalar_gt_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.unchecked_scalar_gt_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn unchecked_scalar_ge_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::GE, stream)
+        self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::GE, streams)
     }
 
     pub fn unchecked_scalar_ge<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        let result = unsafe { self.unchecked_scalar_ge_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.unchecked_scalar_ge_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn unchecked_scalar_lt_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::LT, stream)
+        self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::LT, streams)
     }
 
     pub fn unchecked_scalar_lt<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        let result = unsafe { self.unchecked_scalar_lt_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.unchecked_scalar_lt_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn unchecked_scalar_le_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::LE, stream)
+        self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::LE, streams)
     }
 
     pub fn unchecked_scalar_le<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        let result = unsafe { self.unchecked_scalar_le_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.unchecked_scalar_le_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn scalar_gt_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
@@ -770,38 +772,38 @@ impl CudaServerKey {
         let lhs = if ct.block_carries_are_empty() {
             ct
         } else {
-            tmp_lhs = ct.duplicate_async(stream);
-            self.full_propagate_assign_async(&mut tmp_lhs, stream);
+            tmp_lhs = ct.duplicate_async(streams);
+            self.full_propagate_assign_async(&mut tmp_lhs, streams);
             &tmp_lhs
         };
 
-        self.unchecked_scalar_gt_async(lhs, scalar, stream)
+        self.unchecked_scalar_gt_async(lhs, scalar, streams)
     }
 
     pub fn scalar_gt<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        let result = unsafe { self.scalar_gt_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.scalar_gt_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn scalar_ge_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
@@ -811,38 +813,38 @@ impl CudaServerKey {
         let lhs = if ct.block_carries_are_empty() {
             ct
         } else {
-            tmp_lhs = ct.duplicate_async(stream);
-            self.full_propagate_assign_async(&mut tmp_lhs, stream);
+            tmp_lhs = ct.duplicate_async(streams);
+            self.full_propagate_assign_async(&mut tmp_lhs, streams);
             &tmp_lhs
         };
 
-        self.unchecked_scalar_ge_async(lhs, scalar, stream)
+        self.unchecked_scalar_ge_async(lhs, scalar, streams)
     }
 
     pub fn scalar_ge<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        let result = unsafe { self.scalar_ge_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.scalar_ge_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn scalar_lt_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
@@ -852,37 +854,37 @@ impl CudaServerKey {
         let lhs = if ct.block_carries_are_empty() {
             ct
         } else {
-            tmp_lhs = ct.duplicate_async(stream);
-            self.full_propagate_assign_async(&mut tmp_lhs, stream);
+            tmp_lhs = ct.duplicate_async(streams);
+            self.full_propagate_assign_async(&mut tmp_lhs, streams);
             &tmp_lhs
         };
 
-        self.unchecked_scalar_lt_async(lhs, scalar, stream)
+        self.unchecked_scalar_lt_async(lhs, scalar, streams)
     }
 
     pub fn scalar_lt<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        let result = unsafe { self.scalar_lt_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.scalar_lt_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn scalar_le_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
@@ -892,92 +894,102 @@ impl CudaServerKey {
         let lhs = if ct.block_carries_are_empty() {
             ct
         } else {
-            tmp_lhs = ct.duplicate_async(stream);
-            self.full_propagate_assign_async(&mut tmp_lhs, stream);
+            tmp_lhs = ct.duplicate_async(streams);
+            self.full_propagate_assign_async(&mut tmp_lhs, streams);
             &tmp_lhs
         };
 
-        self.unchecked_scalar_le_async(lhs, scalar, stream)
+        self.unchecked_scalar_le_async(lhs, scalar, streams)
     }
 
     pub fn scalar_le<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> CudaBooleanBlock
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        let result = unsafe { self.scalar_le_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.scalar_le_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn unchecked_scalar_max_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        self.unchecked_scalar_minmax_async(ct, scalar, ComparisonType::MAX, stream)
+        self.unchecked_scalar_minmax_async(ct, scalar, ComparisonType::MAX, streams)
     }
 
-    pub fn unchecked_scalar_max<Scalar, T>(&self, ct: &T, scalar: Scalar, stream: &CudaStreams) -> T
+    pub fn unchecked_scalar_max<Scalar, T>(
+        &self,
+        ct: &T,
+        scalar: Scalar,
+        streams: &CudaStreams,
+    ) -> T
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        let result = unsafe { self.unchecked_scalar_max_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.unchecked_scalar_max_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn unchecked_scalar_min_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        self.unchecked_scalar_minmax_async(ct, scalar, ComparisonType::MIN, stream)
+        self.unchecked_scalar_minmax_async(ct, scalar, ComparisonType::MIN, streams)
     }
 
-    pub fn unchecked_scalar_min<Scalar, T>(&self, ct: &T, scalar: Scalar, stream: &CudaStreams) -> T
+    pub fn unchecked_scalar_min<Scalar, T>(
+        &self,
+        ct: &T,
+        scalar: Scalar,
+        streams: &CudaStreams,
+    ) -> T
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        let result = unsafe { self.unchecked_scalar_min_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.unchecked_scalar_min_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn scalar_max_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T
     where
         Scalar: DecomposableInto<u64>,
@@ -987,33 +999,33 @@ impl CudaServerKey {
         let lhs = if ct.block_carries_are_empty() {
             ct
         } else {
-            tmp_lhs = ct.duplicate_async(stream);
-            self.full_propagate_assign_async(&mut tmp_lhs, stream);
+            tmp_lhs = ct.duplicate_async(streams);
+            self.full_propagate_assign_async(&mut tmp_lhs, streams);
             &tmp_lhs
         };
 
-        self.unchecked_scalar_max_async(lhs, scalar, stream)
+        self.unchecked_scalar_max_async(lhs, scalar, streams)
     }
 
-    pub fn scalar_max<Scalar, T>(&self, ct: &T, scalar: Scalar, stream: &CudaStreams) -> T
+    pub fn scalar_max<Scalar, T>(&self, ct: &T, scalar: Scalar, streams: &CudaStreams) -> T
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        let result = unsafe { self.scalar_max_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.scalar_max_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn scalar_min_async<Scalar, T>(
         &self,
         ct: &T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) -> T
     where
         Scalar: DecomposableInto<u64>,
@@ -1023,21 +1035,21 @@ impl CudaServerKey {
         let lhs = if ct.block_carries_are_empty() {
             ct
         } else {
-            tmp_lhs = ct.duplicate_async(stream);
-            self.full_propagate_assign_async(&mut tmp_lhs, stream);
+            tmp_lhs = ct.duplicate_async(streams);
+            self.full_propagate_assign_async(&mut tmp_lhs, streams);
             &tmp_lhs
         };
 
-        self.unchecked_scalar_min_async(lhs, scalar, stream)
+        self.unchecked_scalar_min_async(lhs, scalar, streams)
     }
 
-    pub fn scalar_min<Scalar, T>(&self, ct: &T, scalar: Scalar, stream: &CudaStreams) -> T
+    pub fn scalar_min<Scalar, T>(&self, ct: &T, scalar: Scalar, streams: &CudaStreams) -> T
     where
         Scalar: DecomposableInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        let result = unsafe { self.scalar_min_async(ct, scalar, stream) };
-        stream.synchronize();
+        let result = unsafe { self.scalar_min_async(ct, scalar, streams) };
+        streams.synchronize();
         result
     }
 }
diff --git a/tfhe/src/integer/gpu/server_key/radix/scalar_mul.rs b/tfhe/src/integer/gpu/server_key/radix/scalar_mul.rs
index 9bab89c008..8d23090fd9 100644
--- a/tfhe/src/integer/gpu/server_key/radix/scalar_mul.rs
+++ b/tfhe/src/integer/gpu/server_key/radix/scalar_mul.rs
@@ -26,50 +26,59 @@ impl CudaServerKey {
     /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
     ///
     /// let gpu_index = 0;
-    /// let mut stream = CudaStreams::new_single_gpu(gpu_index);
+    /// let mut streams = CudaStreams::new_single_gpu(gpu_index);
     ///
     /// // We have 4 * 2 = 8 bits of message
     /// let size = 4;
-    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
+    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
     ///
     /// let msg = 30;
     /// let scalar = 3;
     ///
     /// let ct = cks.encrypt(msg);
-    /// let mut d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut stream);
+    /// let mut d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut streams);
     ///
     /// // Compute homomorphically a scalar multiplication:
-    /// let d_ct_res = sks.unchecked_scalar_mul(&d_ct, scalar, &mut stream);
-    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
+    /// let d_ct_res = sks.unchecked_scalar_mul(&d_ct, scalar, &mut streams);
+    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
     ///
     /// let clear: u64 = cks.decrypt(&ct_res);
     /// assert_eq!(scalar * msg, clear);
     /// ```
-    pub fn unchecked_scalar_mul<Scalar, T>(&self, ct: &T, scalar: Scalar, stream: &CudaStreams) -> T
+    pub fn unchecked_scalar_mul<Scalar, T>(
+        &self,
+        ct: &T,
+        scalar: Scalar,
+        streams: &CudaStreams,
+    ) -> T
     where
         Scalar: ScalarMultiplier + DecomposableInto<u8> + CastInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        let mut result = unsafe { ct.duplicate_async(stream) };
-        self.unchecked_scalar_mul_assign(&mut result, scalar, stream);
+        let mut result = unsafe { ct.duplicate_async(streams) };
+        self.unchecked_scalar_mul_assign(&mut result, scalar, streams);
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn unchecked_scalar_mul_assign_async<Scalar, T>(
         &self,
         ct: &mut T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) where
         Scalar: ScalarMultiplier + DecomposableInto<u8> + CastInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
         if scalar == Scalar::ZERO {
-            ct.as_mut().d_blocks.0.d_vec.memset_async(0, stream, 0);
+            ct.as_mut()
+                .d_blocks
+                .0
+                .d_vec
+                .memset_async(0, streams, streams.gpu_indexes[0]);
             return;
         }
 
@@ -80,7 +89,7 @@ impl CudaServerKey {
         if scalar.is_power_of_two() {
             // Shifting cost one bivariate PBS so its always faster
             // than multiplying
-            self.unchecked_scalar_left_shift_assign_async(ct, scalar.ilog2() as u64, stream);
+            self.unchecked_scalar_left_shift_assign_async(ct, scalar.ilog2() as u64, streams);
             return;
         }
         let ciphertext = ct.as_mut();
@@ -104,7 +113,7 @@ impl CudaServerKey {
         match &self.bootstrapping_key {
             CudaBootstrappingKey::Classic(d_bsk) => {
                 unchecked_scalar_mul_integer_radix_kb_async(
-                    stream,
+                    streams,
                     &mut ct.as_mut().d_blocks.0.d_vec,
                     decomposed_scalar.as_slice(),
                     has_at_least_one_set.as_slice(),
@@ -129,7 +138,7 @@ impl CudaServerKey {
             }
             CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
                 unchecked_scalar_mul_integer_radix_kb_async(
-                    stream,
+                    streams,
                     &mut ct.as_mut().d_blocks.0.d_vec,
                     decomposed_scalar.as_slice(),
                     has_at_least_one_set.as_slice(),
@@ -161,15 +170,15 @@ impl CudaServerKey {
         &self,
         ct: &mut T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) where
         Scalar: ScalarMultiplier + DecomposableInto<u8> + CastInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
         unsafe {
-            self.unchecked_scalar_mul_assign_async(ct, scalar, stream);
+            self.unchecked_scalar_mul_assign_async(ct, scalar, streams);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 
     /// Computes homomorphically a multiplication between a scalar and a ciphertext.
@@ -189,63 +198,63 @@ impl CudaServerKey {
     /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
     ///
     /// let gpu_index = 0;
-    /// let mut stream = CudaStreams::new_single_gpu(gpu_index);
+    /// let mut streams = CudaStreams::new_single_gpu(gpu_index);
     ///
     /// // We have 4 * 2 = 8 bits of message
     /// let size = 4;
-    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
+    /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
     ///
     /// let msg = 30;
     /// let scalar = 3;
     ///
     /// let ct = cks.encrypt(msg);
-    /// let mut d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut stream);
+    /// let mut d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut streams);
     ///
     /// // Compute homomorphically a scalar multiplication:
-    /// let d_ct_res = sks.scalar_mul(&d_ct, scalar, &mut stream);
-    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
+    /// let d_ct_res = sks.scalar_mul(&d_ct, scalar, &mut streams);
+    /// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
     ///
     /// let clear: u64 = cks.decrypt(&ct_res);
     /// assert_eq!(scalar * msg, clear);
     /// ```
-    pub fn scalar_mul<Scalar, T>(&self, ct: &T, scalar: Scalar, stream: &CudaStreams) -> T
+    pub fn scalar_mul<Scalar, T>(&self, ct: &T, scalar: Scalar, streams: &CudaStreams) -> T
     where
         Scalar: ScalarMultiplier + DecomposableInto<u8> + CastInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
-        let mut result = unsafe { ct.duplicate_async(stream) };
-        self.scalar_mul_assign(&mut result, scalar, stream);
+        let mut result = unsafe { ct.duplicate_async(streams) };
+        self.scalar_mul_assign(&mut result, scalar, streams);
         result
     }
 
     /// # Safety
     ///
-    /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
-    ///   not be dropped until stream is synchronised
+    /// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
+    ///   not be dropped until streams is synchronised
     pub unsafe fn scalar_mul_assign_async<Scalar, T>(
         &self,
         ct: &mut T,
         scalar: Scalar,
-        stream: &CudaStreams,
+        streams: &CudaStreams,
     ) where
         Scalar: ScalarMultiplier + DecomposableInto<u8> + CastInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
         if !ct.block_carries_are_empty() {
-            self.full_propagate_assign_async(ct, stream);
+            self.full_propagate_assign_async(ct, streams);
         };
 
-        self.unchecked_scalar_mul_assign_async(ct, scalar, stream);
+        self.unchecked_scalar_mul_assign_async(ct, scalar, streams);
     }
 
-    pub fn scalar_mul_assign<Scalar, T>(&self, ct: &mut T, scalar: Scalar, stream: &CudaStreams)
+    pub fn scalar_mul_assign<Scalar, T>(&self, ct: &mut T, scalar: Scalar, streams: &CudaStreams)
     where
         Scalar: ScalarMultiplier + DecomposableInto<u8> + CastInto<u64>,
         T: CudaIntegerRadixCiphertext,
     {
         unsafe {
-            self.scalar_mul_assign_async(ct, scalar, stream);
+            self.scalar_mul_assign_async(ct, scalar, streams);
         }
-        stream.synchronize();
+        streams.synchronize();
     }
 }