Skip to content

Commit

Permalink
fix(gpu): attempt to fix scalar comparison with 1 block
Browse files Browse the repository at this point in the history
  • Loading branch information
agnesLeroy committed Jul 22, 2024
1 parent 32262e0 commit 3f3860c
Show file tree
Hide file tree
Showing 6 changed files with 7 additions and 8 deletions.
2 changes: 1 addition & 1 deletion backends/tfhe-cuda-backend/cuda/include/integer.h
Original file line number Diff line number Diff line change
Expand Up @@ -1736,7 +1736,7 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
int_radix_params params,
uint32_t num_radix_blocks,
bool allocate_gpu_memory) {
active_gpu_count = get_active_gpu_count(num_radix_blocks, gpu_count);
active_gpu_count = get_active_gpu_count(1, gpu_count);
// In the arithmetic shift, a PBS has to be applied to the last rotated
// block twice: once to shift it, once to compute the padding block to be
// copied onto all blocks to the left of the last rotated block
Expand Down
4 changes: 3 additions & 1 deletion backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,10 @@ __host__ void host_integer_radix_cmux_kb(
mem_ptr->predicate_lut, bsks, ksks, num_radix_blocks);
}
}
for (uint j = 0; j < gpu_count; j++) {
for (uint j = 0; j < mem_ptr->zero_if_true_buffer->active_gpu_count; j++) {
cuda_synchronize_stream(true_streams[j], gpu_indexes[j]);
}
for (uint j = 0; j < mem_ptr->zero_if_false_buffer->active_gpu_count; j++) {
cuda_synchronize_stream(false_streams[j], gpu_indexes[j]);
}

Expand Down
1 change: 0 additions & 1 deletion backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,6 @@ __host__ void host_compare_with_zero_equality(
int_comparison_buffer<Torus> *mem_ptr, void **bsks, Torus **ksks,
int32_t num_radix_blocks, int_radix_lut<Torus> *zero_comparison) {

cudaSetDevice(gpu_indexes[0]);
auto params = mem_ptr->params;
auto big_lwe_dimension = params.big_lwe_dimension;
auto message_modulus = params.message_modulus;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
mem_ptr, bsks, ksks, num_msb_radix_blocks, mem_ptr->is_zero_lut);
}
}
for (uint j = 0; j < gpu_count; j++) {
for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
cuda_synchronize_stream(lsb_streams[j], gpu_indexes[j]);
cuda_synchronize_stream(msb_streams[j], gpu_indexes[j]);
}
Expand Down Expand Up @@ -205,7 +205,6 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
std::function<Torus(Torus)> sign_handler_f, void **bsks, Torus **ksks,
uint32_t total_num_radix_blocks, uint32_t total_num_scalar_blocks) {

cudaSetDevice(gpu_indexes[0]);
auto params = mem_ptr->params;
auto big_lwe_dimension = params.big_lwe_dimension;
auto glwe_dimension = params.glwe_dimension;
Expand Down Expand Up @@ -737,7 +736,7 @@ __host__ void host_integer_radix_scalar_equality_check_kb(
}
}

for (uint j = 0; j < gpu_count; j++) {
for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
cuda_synchronize_stream(lsb_streams[j], gpu_indexes[j]);
cuda_synchronize_stream(msb_streams[j], gpu_indexes[j]);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ __host__ void host_integer_radix_arithmetic_scalar_shift_kb_inplace(
}
}
}
for (uint j = 0; j < gpu_count; j++) {
for (uint j = 0; j < mem->active_gpu_count; j++) {
cuda_synchronize_stream(mem->local_streams_1[j], gpu_indexes[j]);
cuda_synchronize_stream(mem->local_streams_2[j], gpu_indexes[j]);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ int cuda_setup_multi_gpu() {
num_used_gpus += 1;
}
} else {
int has_peer_access_to_device_0;
for (int i = 1; i < num_gpus; i++)
num_used_gpus += 1;
}
Expand Down

0 comments on commit 3f3860c

Please sign in to comment.