From 7d0faa81cbc45458bdc0c0caa5f52793755fd193 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Wed, 4 Sep 2024 11:58:34 +0000
Subject: [PATCH] rocm

---
 .../test_cli_rocm_pytorch_multi_gpu.yaml       |  5 +----
 .../test_cli_rocm_pytorch_single_gpu.yaml      |  5 +----
 Makefile                                       | 12 ++++++------
 docker/cuda/Dockerfile                         |  6 ++++--
 docker/rocm/Dockerfile                         | 14 +++++++++++---
 optimum_benchmark/backends/pytorch/backend.py  | 18 +++++++++++-------
 scripts/install_quantization_libs.py           |  4 +++-
 7 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml b/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
index 9b8ba321..050b53e1 100644
--- a/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
+++ b/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
@@ -49,8 +49,5 @@ jobs:
           pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq] "deepspeed<0.15"
 
       - name: Run tests
-        env:
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
-          PUSH_REPO_ID: optimum-benchmark/rocm
         run: |
-          pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
+          pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb"
diff --git a/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml b/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
index 3f19ad11..59fdd4f6 100644
--- a/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
+++ b/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
@@ -49,8 +49,5 @@ jobs:
           pip install -e .[testing,diffusers,timm,peft,autoawq,auto-gptq]
 
       - name: Run tests
-        env:
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
-          PUSH_REPO_ID: optimum-benchmark/rocm
         run: |
-          pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq or gptq)"
+          pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb"
diff --git a/Makefile b/Makefile
index ffa2c7c7..0e14a5db 100644
--- a/Makefile
+++ b/Makefile
@@ -19,7 +19,7 @@ install:
 ## Build docker
 
 build_cpu_image:
-	docker build -t optimum-benchmark:latest-cpu docker/cpu
+	docker build -t optimum-benchmark:latest-cpu -f docker/cpu/Dockerfile .
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cpu --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cpu docker/unroot
 
 build_cuda_image:
@@ -27,11 +27,11 @@ build_cuda_image:
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cuda --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda docker/unroot
 
 build_cuda_ort_image:
-	docker build -t optimum-benchmark:latest-cuda-ort docker/cuda-ort
+	docker build -t optimum-benchmark:latest-cuda-ort -f docker/cuda-ort/Dockerfile .
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cuda-ort --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda-ort docker/unroot
 
 build_rocm_image:
-	docker build -t optimum-benchmark:latest-rocm docker/rocm
+	docker build -t optimum-benchmark:latest-rocm -f docker/rocm/Dockerfile .
 	docker build --build-arg IMAGE=optimum-benchmark:latest-rocm --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-rocm docker/unroot
 
 # Run docker
@@ -111,7 +111,7 @@ install_cli_cuda_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
 
 install_cli_rocm_pytorch:
-	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
+	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq] "deepspeed<0.15"
 
 install_cli_cuda_torch_ort:
 	pip install -e .[testing,timm,diffusers,peft,torch-ort,deepspeed]
@@ -167,10 +167,10 @@ test_cli_cuda_torch_ort_single_gpu:
 	pytest -s -k "cli and cuda and torch-ort and not (dp or ddp or device_map or deepspeed) and not peft"
 
 test_cli_rocm_pytorch_multi_gpu:
-	pytest -s -k "cli and rocm and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
+	pytest -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb"
 
 test_cli_rocm_pytorch_single_gpu:
-	pytest -s -k "cli and rocm and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
+	pytest -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb"
 
 test_cli_llama_cpp:
 	pytest -s -k "llama_cpp"
diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile
index c45ab973..fa43fa49 100644
--- a/docker/cuda/Dockerfile
+++ b/docker/cuda/Dockerfile
@@ -24,7 +24,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     python3.10 python3-pip python3.10-dev && \
     apt-get clean && rm -rf /var/lib/apt/lists/* && \
     update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
-    pip install --no-cache-dir --upgrade pip setuptools wheel requests ninja
+    pip install --no-cache-dir --upgrade pip setuptools wheel requests
 
 # Install PyTorch
 ARG TORCH_CUDA=cu124
@@ -39,6 +39,8 @@ else \
 fi
 
 # Install quantization libraries from source
+ENV CUDA_VERSION=12.4
 ENV TORCH_CUDA_ARCH_LIST="6.0 7.0 7.5 8.0 8.6 9.0+PTX"
+
 COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py
-RUN python internal/install_quantization_libs.py --install-autogptq-from-source
+RUN python internal/install_quantization_libs.py --install-autogptq-from-source --install-autoawq-from-source
diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile
index 7dfd3719..611d058a 100644
--- a/docker/rocm/Dockerfile
+++ b/docker/rocm/Dockerfile
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ARG ROCM_VERSION=6.1.2
+ARG ROCM_VERSION=5.7.1
 ARG UBUNTU_VERSION=22.04
 
 FROM rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}
@@ -21,15 +21,16 @@ FROM rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}
 ENV PATH="/opt/rocm/bin:${PATH}"
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \ 
+    rocsparse-dev hipsparse-dev rocthrust-dev rocblas-dev hipblas-dev \
     sudo build-essential git bash-completion \
     python3.10 python3-pip python3.10-dev && \
     apt-get clean && rm -rf /var/lib/apt/lists/* && \
     update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
-    pip install --no-cache-dir --upgrade pip setuptools wheel  && \
+    pip install --no-cache-dir --upgrade pip setuptools wheel requests && \
     cd /opt/rocm/share/amd_smi && pip install .
 
 # Install PyTorch
-ARG TORCH_ROCM=rocm6.1
+ARG TORCH_ROCM=rocm5.7
 ARG TORCH_VERSION=stable
 
 RUN if [ "${TORCH_VERSION}" = "stable" ]; then \
@@ -39,3 +40,10 @@ elif [ "${TORCH_VERSION}" = "nightly" ]; then \
 else \
     pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_ROCM} ; \
 fi
+
+# Install quantization libraries from source
+ENV ROCM_VERSION=5.7
+ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
+
+COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py
+RUN python internal/install_quantization_libs.py --install-autogptq-from-source --install-autoawq-from-source
\ No newline at end of file
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index dd0523f9..fcf522b5 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -291,9 +291,11 @@ def process_quantization_config(self) -> None:
                 import exllamav2_kernels  # noqa: F401
             except ImportError:
                 raise ImportError(
-                    "GPTQ quantization requires the AutoGPTQ package. "
-                    "Please install it from source at `https://github.com/AutoGPTQ/AutoGPTQ`"
-                    "Or use `python scripts/install_quantization_libs.py --install-autogptq-from-source` from our repository"
+                    "Tried to import `exllamav2_kernels` but failed. "
+                    "This means that the AutoGPTQ package is either not installed or not compiled with the right torch version. "
+                    "Please install it from source following the instructions at `https://github.com/AutoGPTQ/AutoGPTQ`"
+                    "Or use `python scripts/install_quantization_libs.py --install-autogptq-from-source` in "
+                    "`optimum-benchmark` repository at `https://github.com/huggingface/optimum-benchmark`."
                 )
 
             self.quantization_config = GPTQConfig(
@@ -303,12 +305,14 @@ def process_quantization_config(self) -> None:
             self.logger.info("\t+ Processing AWQ config")
 
             try:
-                import awq_ext  # noqa: F401
+                import exlv2_ext  # noqa: F401
             except ImportError:
                 raise ImportError(
-                    "AWQ quantization requires the AutoAWQ package. "
-                    "Please install it from source at `https://github.com/casper-hansen/AutoAWQ`"
-                    "Or use `python scripts/install_quantization_libs.py --install-autoawq-from-source` from our repository"
+                    "Tried to import `exlv2_ext` but failed. "
+                    "This means that the AutoAWQ package is either not installed or not compiled with the right torch version. "
+                    "Please install it from source following the instructions at `https://github.com/casper-hansen/AutoAWQ`"
+                    "Or use `python scripts/install_quantization_libs.py --install-autoawq-from-source` in "
+                    "`optimum-benchmark` repository at `https://github.com/huggingface/optimum-benchmark`."
                 )
 
             self.quantization_config = AwqConfig(
diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py
index 3e1ff9aa..9feebbc1 100644
--- a/scripts/install_quantization_libs.py
+++ b/scripts/install_quantization_libs.py
@@ -95,7 +95,9 @@ def main():
         help="Install AutoAWQ and AutoAWQ_kernels packages from source.",
     )
     parser.add_argument(
-        "--install-autogptq-from-source", action="store_true", help="Install AutoGPTQ package from source."
+        "--install-autogptq-from-source",
+        action="store_true",
+        help="Install AutoGPTQ package from source.",
     )
 
     args = parser.parse_args()