Skip to content

Commit

Permalink
fix(gpu): dispatch/gather inputs and outputs to the ks and pbs on all…
Browse files Browse the repository at this point in the history
… GPUs
  • Loading branch information
agnesLeroy committed Jul 22, 2024
1 parent 95ef13f commit ae8a5a9
Show file tree
Hide file tree
Showing 34 changed files with 856 additions and 510 deletions.
22 changes: 17 additions & 5 deletions .github/workflows/integer_multi_bit_multi_gpu_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ jobs:
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: multi-gpu-test
backend: hyperstack
profile: multi-h100-nvlink

cuda-integer-multi-bit-multi-gpu-benchmarks:
name: Execute multi GPU integer multi-bit benchmarks
Expand All @@ -62,11 +62,23 @@ jobs:
include:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 9
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}

CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs repo with tags
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
with:
Expand Down Expand Up @@ -135,7 +147,7 @@ jobs:
run: |
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
--database tfhe_rs \
--hardware "p3.8xlarge" \
--hardware "n3-H100x8-nvlink" \
--backend gpu \
--project-version "${{ env.COMMIT_HASH }}" \
--branch ${{ github.ref_name }} \
Expand Down
22 changes: 17 additions & 5 deletions .github/workflows/integer_multi_gpu_full_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ jobs:
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: multi-gpu-test
backend: hyperstack
profile: multi-h100-nvlink

cuda-integer-full-multi-gpu-benchmarks:
name: Execute multi GPU integer benchmarks for all operations flavor
Expand All @@ -54,11 +54,23 @@ jobs:
include:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 9
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}

CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs repo with tags
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
with:
Expand Down Expand Up @@ -117,7 +129,7 @@ jobs:
run: |
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
--database tfhe_rs \
--hardware "p3.8xlarge" \
--hardware "n3-H100x8-nvlink" \
--backend gpu \
--project-version "${{ env.COMMIT_HASH }}" \
--branch ${{ github.ref_name }} \
Expand Down
16 changes: 16 additions & 0 deletions backends/tfhe-cuda-backend/cuda/include/helper_multi_gpu.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef HELPER_MULTI_GPU_H
#define HELPER_MULTI_GPU_H
#include <mutex>
#include <variant>
#include <vector>

extern std::mutex m;
extern bool p2p_enabled;
Expand All @@ -9,6 +11,20 @@ extern "C" {
int cuda_setup_multi_gpu();
}

// Define a variant type that can be either a vector or a single pointer
template <typename Torus>
using LweArrayVariant = std::variant<std::vector<Torus *>, Torus *>;

// Macro to define the visitor logic using std::holds_alternative for vectors
#define GET_VARIANT_ELEMENT(variant, index) \
[&] { \
if (std::holds_alternative<std::vector<Torus *>>(variant)) { \
return std::get<std::vector<Torus *>>(variant)[index]; \
} else { \
return std::get<Torus *>(variant); \
} \
}()

int get_active_gpu_count(int num_inputs, int gpu_count);

int get_num_inputs_on_gpu(int total_num_inputs, int gpu_index, int gpu_count);
Expand Down
Loading

0 comments on commit ae8a5a9

Please sign in to comment.