From 7d0faa81cbc45458bdc0c0caa5f52793755fd193 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Wed, 4 Sep 2024 11:58:34 +0000 Subject: [PATCH] rocm --- .../test_cli_rocm_pytorch_multi_gpu.yaml | 5 +---- .../test_cli_rocm_pytorch_single_gpu.yaml | 5 +---- Makefile | 12 ++++++------ docker/cuda/Dockerfile | 6 ++++-- docker/rocm/Dockerfile | 14 +++++++++++--- optimum_benchmark/backends/pytorch/backend.py | 18 +++++++++++------- scripts/install_quantization_libs.py | 4 +++- 7 files changed, 37 insertions(+), 27 deletions(-) diff --git a/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml b/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml index 9b8ba321..050b53e1 100644 --- a/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml +++ b/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml @@ -49,8 +49,5 @@ jobs: pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq] "deepspeed<0.15" - name: Run tests - env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} - PUSH_REPO_ID: optimum-benchmark/rocm run: | - pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)" + pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb" diff --git a/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml b/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml index 3f19ad11..59fdd4f6 100644 --- a/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml +++ b/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml @@ -49,8 +49,5 @@ jobs: pip install -e .[testing,diffusers,timm,peft,autoawq,auto-gptq] - name: Run tests - env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} - PUSH_REPO_ID: optimum-benchmark/rocm run: | - pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq or gptq)" + pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb" diff --git a/Makefile b/Makefile index ffa2c7c7..0e14a5db 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ install: ## Build docker build_cpu_image: - docker build -t optimum-benchmark:latest-cpu docker/cpu + docker build -t optimum-benchmark:latest-cpu -f docker/cpu/Dockerfile . docker build --build-arg IMAGE=optimum-benchmark:latest-cpu --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cpu docker/unroot build_cuda_image: @@ -27,11 +27,11 @@ build_cuda_image: docker build --build-arg IMAGE=optimum-benchmark:latest-cuda --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda docker/unroot build_cuda_ort_image: - docker build -t optimum-benchmark:latest-cuda-ort docker/cuda-ort + docker build -t optimum-benchmark:latest-cuda-ort -f docker/cuda-ort/Dockerfile . docker build --build-arg IMAGE=optimum-benchmark:latest-cuda-ort --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda-ort docker/unroot build_rocm_image: - docker build -t optimum-benchmark:latest-rocm docker/rocm + docker build -t optimum-benchmark:latest-rocm -f docker/rocm/Dockerfile . docker build --build-arg IMAGE=optimum-benchmark:latest-rocm --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-rocm docker/unroot # Run docker @@ -111,7 +111,7 @@ install_cli_cuda_pytorch: pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed] install_cli_rocm_pytorch: - pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed] + pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq] "deepspeed<0.15" install_cli_cuda_torch_ort: pip install -e .[testing,timm,diffusers,peft,torch-ort,deepspeed] @@ -167,10 +167,10 @@ test_cli_cuda_torch_ort_single_gpu: pytest -s -k "cli and cuda and torch-ort and not (dp or ddp or device_map or deepspeed) and not peft" test_cli_rocm_pytorch_multi_gpu: - pytest -s -k "cli and rocm and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)" + pytest -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb" test_cli_rocm_pytorch_single_gpu: - pytest -s -k "cli and rocm and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq)" + pytest -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb" test_cli_llama_cpp: pytest -s -k "llama_cpp" diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile index c45ab973..fa43fa49 100644 --- a/docker/cuda/Dockerfile +++ b/docker/cuda/Dockerfile @@ -24,7 +24,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ python3.10 python3-pip python3.10-dev && \ apt-get clean && rm -rf /var/lib/apt/lists/* && \ update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \ - pip install --no-cache-dir --upgrade pip setuptools wheel requests ninja + pip install --no-cache-dir --upgrade pip setuptools wheel requests # Install PyTorch ARG TORCH_CUDA=cu124 @@ -39,6 +39,8 @@ else \ fi # Install quantization libraries from source +ENV CUDA_VERSION=12.4 ENV TORCH_CUDA_ARCH_LIST="6.0 7.0 7.5 8.0 8.6 9.0+PTX" + COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py -RUN python internal/install_quantization_libs.py --install-autogptq-from-source +RUN python internal/install_quantization_libs.py --install-autogptq-from-source --install-autoawq-from-source diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile index 7dfd3719..611d058a 100644 --- a/docker/rocm/Dockerfile +++ b/docker/rocm/Dockerfile @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG ROCM_VERSION=6.1.2 +ARG ROCM_VERSION=5.7.1 ARG UBUNTU_VERSION=22.04 FROM rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION} @@ -21,15 +21,16 @@ FROM rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION} ENV PATH="/opt/rocm/bin:${PATH}" ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \ + rocsparse-dev hipsparse-dev rocthrust-dev rocblas-dev hipblas-dev \ sudo build-essential git bash-completion \ python3.10 python3-pip python3.10-dev && \ apt-get clean && rm -rf /var/lib/apt/lists/* && \ update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \ - pip install --no-cache-dir --upgrade pip setuptools wheel && \ + pip install --no-cache-dir --upgrade pip setuptools wheel requests && \ cd /opt/rocm/share/amd_smi && pip install . # Install PyTorch -ARG TORCH_ROCM=rocm6.1 +ARG TORCH_ROCM=rocm5.7 ARG TORCH_VERSION=stable RUN if [ "${TORCH_VERSION}" = "stable" ]; then \ @@ -39,3 +40,10 @@ elif [ "${TORCH_VERSION}" = "nightly" ]; then \ else \ pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_ROCM} ; \ fi + +# Install quantization libraries from source +ENV ROCM_VERSION=5.7 +ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100" + +COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py +RUN python internal/install_quantization_libs.py --install-autogptq-from-source --install-autoawq-from-source \ No newline at end of file diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py index dd0523f9..fcf522b5 100644 --- a/optimum_benchmark/backends/pytorch/backend.py +++ b/optimum_benchmark/backends/pytorch/backend.py @@ -291,9 +291,11 @@ def process_quantization_config(self) -> None: import exllamav2_kernels # noqa: F401 except ImportError: raise ImportError( - "GPTQ quantization requires the AutoGPTQ package. " - "Please install it from source at `https://github.com/AutoGPTQ/AutoGPTQ`" - "Or use `python scripts/install_quantization_libs.py --install-autogptq-from-source` from our repository" + "Tried to import `exllamav2_kernels` but failed. " + "This means that the AutoGPTQ package is either not installed or not compiled with the right torch version. " + "Please install it from source following the instructions at `https://github.com/AutoGPTQ/AutoGPTQ`" + "Or use `python scripts/install_quantization_libs.py --install-autogptq-from-source` in " + "`optimum-benchmark` repository at `https://github.com/huggingface/optimum-benchmark`." ) self.quantization_config = GPTQConfig( @@ -303,12 +305,14 @@ def process_quantization_config(self) -> None: self.logger.info("\t+ Processing AWQ config") try: - import awq_ext # noqa: F401 + import exlv2_ext # noqa: F401 except ImportError: raise ImportError( - "AWQ quantization requires the AutoAWQ package. " - "Please install it from source at `https://github.com/casper-hansen/AutoAWQ`" - "Or use `python scripts/install_quantization_libs.py --install-autoawq-from-source` from our repository" + "Tried to import `exlv2_ext` but failed. " + "This means that the AutoAWQ package is either not installed or not compiled with the right torch version. " + "Please install it from source following the instructions at `https://github.com/casper-hansen/AutoAWQ`" + "Or use `python scripts/install_quantization_libs.py --install-autoawq-from-source` in " + "`optimum-benchmark` repository at `https://github.com/huggingface/optimum-benchmark`." ) self.quantization_config = AwqConfig( diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py index 3e1ff9aa..9feebbc1 100644 --- a/scripts/install_quantization_libs.py +++ b/scripts/install_quantization_libs.py @@ -95,7 +95,9 @@ def main(): help="Install AutoAWQ and AutoAWQ_kernels packages from source.", ) parser.add_argument( - "--install-autogptq-from-source", action="store_true", help="Install AutoGPTQ package from source." + "--install-autogptq-from-source", + action="store_true", + help="Install AutoGPTQ package from source.", ) args = parser.parse_args()