Build from source quantization packages (#239)

Co-authored-by: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com> Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
huggingface · Sep 4, 2024 · 8e57994 · 8e57994
1 parent 6dfec54
commit 8e57994
Show file tree

Hide file tree

Showing 11 changed files with 179 additions and 34 deletions.
diff --git a/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml b/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml
@@ -49,4 +49,4 @@ jobs:
             --workdir /workspace
           run: |
             pip install -e .[testing,diffusers,timm,peft,bitsandbytes,autoawq,auto-gptq]
-            pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (awq)"
+            pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)"
diff --git a/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml b/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
@@ -49,8 +49,5 @@ jobs:
           pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq] "deepspeed<0.15"
 
       - name: Run tests
-        env:
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
-          PUSH_REPO_ID: optimum-benchmark/rocm
         run: |
-          pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
+          pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb"
diff --git a/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml b/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
@@ -49,8 +49,5 @@ jobs:
           pip install -e .[testing,diffusers,timm,peft,autoawq,auto-gptq]
 
       - name: Run tests
-        env:
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
-          PUSH_REPO_ID: optimum-benchmark/rocm
         run: |
-          pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq or gptq)"
+          pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb"
diff --git a/.gitignore b/.gitignore
@@ -173,6 +173,6 @@ experiments/
 amdsmi/
 amd-*
 
-# Mac specific
+external_repos/
 .DS_Store
-outputs/
+outputs/
diff --git a/Makefile b/Makefile
@@ -19,19 +19,19 @@ install:
 ## Build docker
 
 build_cpu_image:
-	docker build -t optimum-benchmark:latest-cpu docker/cpu
+	docker build -t optimum-benchmark:latest-cpu -f docker/cpu/Dockerfile .
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cpu --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cpu docker/unroot
 
 build_cuda_image:
-	docker build -t optimum-benchmark:latest-cuda docker/cuda
+	docker build -t optimum-benchmark:latest-cuda -f docker/cuda/Dockerfile .
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cuda --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda docker/unroot
 
 build_cuda_ort_image:
-	docker build -t optimum-benchmark:latest-cuda-ort docker/cuda-ort
+	docker build -t optimum-benchmark:latest-cuda-ort -f docker/cuda-ort/Dockerfile .
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cuda-ort --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda-ort docker/unroot
 
 build_rocm_image:
-	docker build -t optimum-benchmark:latest-rocm docker/rocm
+	docker build -t optimum-benchmark:latest-rocm -f docker/rocm/Dockerfile .
 	docker build --build-arg IMAGE=optimum-benchmark:latest-rocm --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-rocm docker/unroot
 
 # Run docker
@@ -111,7 +111,7 @@ install_cli_cuda_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
 
 install_cli_rocm_pytorch:
-	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
+	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq] "deepspeed<0.15"
 
 install_cli_cuda_torch_ort:
 	pip install -e .[testing,timm,diffusers,peft,torch-ort,deepspeed]
@@ -167,10 +167,10 @@ test_cli_cuda_torch_ort_single_gpu:
 	pytest -s -k "cli and cuda and torch-ort and not (dp or ddp or device_map or deepspeed) and not peft"
 
 test_cli_rocm_pytorch_multi_gpu:
-	pytest -s -k "cli and rocm and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
+	pytest -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb"
 
 test_cli_rocm_pytorch_single_gpu:
-	pytest -s -k "cli and rocm and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
+	pytest -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb"
 
 test_cli_llama_cpp:
 	pytest -s -k "llama_cpp"

diff --git a/docker/cuda-ort/Dockerfile b/docker/cuda-ort/Dockerfile
@@ -39,5 +39,5 @@ else \
     pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \
 fi
 
-ENV TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX"
+ENV TORCH_CUDA_ARCH_LIST="5.0 6.0 7.0 7.5 8.0 8.6 9.0+PTX"
 RUN pip install --no-cache-dir torch-ort onnxruntime-training && python -m torch_ort.configure
diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile
@@ -18,13 +18,13 @@ ARG UBUNTU_VERSION=22.04
 FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu${UBUNTU_VERSION}
 
 # Install necessary packages
-ENV DEBIAN_FRONTEND noninteractive
+ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && apt-get install -y --no-install-recommends \
     sudo build-essential git bash-completion \
     python3.10 python3-pip python3.10-dev && \
     apt-get clean && rm -rf /var/lib/apt/lists/* && \
     update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
-    pip install --no-cache-dir --upgrade pip setuptools wheel 
+    pip install --no-cache-dir --upgrade pip setuptools wheel requests
 
 # Install PyTorch
 ARG TORCH_CUDA=cu124
@@ -36,4 +36,11 @@ elif [ "${TORCH_VERSION}" = "nighly" ]; then \
     pip install --no-cache-dir --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \
 else \
     pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \
-fi
+fi
+
+# Install quantization libraries from source
+ENV CUDA_VERSION=12.4
+ENV TORCH_CUDA_ARCH_LIST="6.0 7.0 7.5 8.0 8.6 9.0+PTX"
+
+COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py
+RUN python internal/install_quantization_libs.py --install-autogptq-from-source --install-autoawq-from-source
diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ARG ROCM_VERSION=6.1.2
+ARG ROCM_VERSION=5.7.1
 ARG UBUNTU_VERSION=22.04
 
 FROM rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}
@@ -21,15 +21,16 @@ FROM rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}
 ENV PATH="/opt/rocm/bin:${PATH}"
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \ 
+    rocsparse-dev hipsparse-dev rocthrust-dev rocblas-dev hipblas-dev \
     sudo build-essential git bash-completion \
     python3.10 python3-pip python3.10-dev && \
     apt-get clean && rm -rf /var/lib/apt/lists/* && \
     update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
-    pip install --no-cache-dir --upgrade pip setuptools wheel  && \
+    pip install --no-cache-dir --upgrade pip setuptools wheel requests && \
     cd /opt/rocm/share/amd_smi && pip install .
 
 # Install PyTorch
-ARG TORCH_ROCM=rocm6.1
+ARG TORCH_ROCM=rocm5.7
 ARG TORCH_VERSION=stable
 
 RUN if [ "${TORCH_VERSION}" = "stable" ]; then \
@@ -39,3 +40,10 @@ elif [ "${TORCH_VERSION}" = "nightly" ]; then \
 else \
     pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_ROCM} ; \
 fi
+
+# Install quantization libraries from source
+ENV ROCM_VERSION=5.7
+ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
+
+COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py
+RUN python internal/install_quantization_libs.py --install-autogptq-from-source --install-autoawq-from-source
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
@@ -286,11 +286,35 @@ def create_no_weights_model(self) -> None:
     def process_quantization_config(self) -> None:
         if self.is_gptq_quantized:
             self.logger.info("\t+ Processing GPTQ config")
+
+            try:
+                import exllamav2_kernels  # noqa: F401
+            except ImportError:
+                raise ImportError(
+                    "Tried to import `exllamav2_kernels` but failed. "
+                    "This means that the AutoGPTQ package is either not installed or not compiled with the right torch version. "
+                    "Please install it from source following the instructions at `https://github.com/AutoGPTQ/AutoGPTQ`"
+                    "Or use `python scripts/install_quantization_libs.py --install-autogptq-from-source` in "
+                    "`optimum-benchmark` repository at `https://github.com/huggingface/optimum-benchmark`."
+                )
+
             self.quantization_config = GPTQConfig(
                 **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config)
             )
         elif self.is_awq_quantized:
             self.logger.info("\t+ Processing AWQ config")
+
+            try:
+                import exlv2_ext  # noqa: F401
+            except ImportError:
+                raise ImportError(
+                    "Tried to import `exlv2_ext` but failed. "
+                    "This means that the AutoAWQ package is either not installed or not compiled with the right torch version. "
+                    "Please install it from source following the instructions at `https://github.com/casper-hansen/AutoAWQ`"
+                    "Or use `python scripts/install_quantization_libs.py --install-autoawq-from-source` in "
+                    "`optimum-benchmark` repository at `https://github.com/huggingface/optimum-benchmark`."
+                )
+
             self.quantization_config = AwqConfig(
                 **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config)
             )

diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py
@@ -0,0 +1,118 @@
+import argparse
+import os
+import re
+import subprocess
+import sys
+
+EXTERNAL_REPOS_DIR = "external_repos"
+
+
+def process_setup_file(setup_file_path):
+    with open(setup_file_path, "r") as file:
+        setup_content = file.read()
+
+    # Use a regular expression to remove any line containing "torch=="
+    setup_content = re.sub(r'"torch==[^\"]+",', "", setup_content)
+
+    # Set IS_CPU_ONLY to False
+    setup_content = setup_content.replace(
+        "IS_CPU_ONLY = not torch.backends.mps.is_available() and not torch.cuda.is_available()", "IS_CPU_ONLY = False"
+    )
+
+    # Write the modified content back to setup.py
+    with open(setup_file_path, "w") as file:
+        file.write(setup_content)
+
+
+def clone_or_pull_repo(repo_url, repo_location_path):
+    """Clone the repo if it doesn't exist; otherwise, pull the latest changes."""
+    if os.path.exists(repo_location_path):
+        print(f"Directory {repo_location_path} already exists. Pulling the latest changes.")
+        subprocess.run(f"cd {repo_location_path} && git pull", shell=True, check=True)
+    else:
+        repo_name = repo_location_path.split("/")[-1]
+        print(f"Cloning {repo_name} into {repo_location_path}")
+        subprocess.run(f"git clone {repo_url} {repo_location_path}", shell=True, check=True)
+
+
+def install_autoawq_from_source():
+    """Install the AutoAWQ and AutoAWQ_kernels packages from GitHub."""
+    print("Installing AutoAWQ and AutoAWQ_kernels packages.")
+
+    autoawq_repo_name = "AutoAWQ"
+    autoawq_kernels_repo_name = "AutoAWQ_kernels"
+
+    autoawq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_repo_name)
+    kernels_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_kernels_repo_name)
+
+    clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_kernels_repo_name}", kernels_repo_path)
+    kernels_setup_file_path = os.path.join(kernels_repo_path, "setup.py")
+    process_setup_file(kernels_setup_file_path)
+    subprocess.run(
+        f"cd {kernels_repo_path} && {sys.executable} -m pip install .",
+        shell=True,
+        check=True,
+        env=os.environ,
+    )
+
+    clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_repo_name}", autoawq_repo_path)
+    autoawq_setup_file_path = os.path.join(autoawq_repo_path, "setup.py")
+    process_setup_file(autoawq_setup_file_path)
+    subprocess.run(
+        f"cd {autoawq_repo_path} && {sys.executable} -m pip install .",
+        shell=True,
+        check=True,
+        env=os.environ,
+    )
+
+    print("AutoAWQ and AutoAWQ_kernels packages installed.")
+
+
+def install_autogptq_from_source():
+    """Install the AutoGPTQ package from GitHub."""
+    print("Installing AutoGPTQ package.")
+    autogptq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, "AutoGPTQ")
+
+    clone_or_pull_repo("https://github.com/PanQiWei/AutoGPTQ.git", autogptq_repo_path)
+    subprocess.run("pip install numpy gekko pandas", shell=True, check=True, env=os.environ)
+    autogptq_setup_file_path = os.path.join(autogptq_repo_path, "setup.py")
+    process_setup_file(autogptq_setup_file_path)
+    subprocess.run(
+        f"cd {autogptq_repo_path} && {sys.executable} -m pip install .",
+        shell=True,
+        check=True,
+        env=os.environ,
+    )
+
+    print("AutoGPTQ package installed.")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Install AutoAWQ or AutoGPTQ from source.")
+    parser.add_argument(
+        "--install-autoawq-from-source",
+        action="store_true",
+        help="Install AutoAWQ and AutoAWQ_kernels packages from source.",
+    )
+    parser.add_argument(
+        "--install-autogptq-from-source",
+        action="store_true",
+        help="Install AutoGPTQ package from source.",
+    )
+
+    args = parser.parse_args()
+
+    if args.install_autoawq_from_source:
+        install_autoawq_from_source()
+    if args.install_autogptq_from_source:
+        install_autogptq_from_source()
+
+    if not args.install_autoawq_from_source and not args.install_autogptq_from_source:
+        print(
+            "Please specify an installation option. Use --install-autoawq-from-source or --install-autogptq-from-source."
+        )
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/setup.py b/setup.py
@@ -58,12 +58,6 @@
             "Please install amdsmi from https://github.com/ROCm/amdsmi to enable this feature."
         )
 
-if USE_ROCM:
-    AUTOAWQ = "autoawq@https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.1/autoawq-0.2.1+rocm571-cp310-cp310-linux_x86_64.whl"
-    AUTOGPTQ = "auto-gptq@https://huggingface.github.io/autogptq-index/whl/rocm573/auto-gptq/auto_gptq-0.7.1%2Brocm5.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
-else:
-    AUTOAWQ = "autoawq==0.2.1"
-    AUTOGPTQ = "auto-gptq==0.7.1"
 
 EXTRAS_REQUIRE = {
     "quality": ["ruff"],
@@ -81,8 +75,8 @@
     "py-txi": ["py-txi"],
     "vllm": ["vllm"],
     # optional dependencies
-    "autoawq": [AUTOAWQ],
-    "auto-gptq": ["optimum", AUTOGPTQ],
+    "autoawq": ["autoawq"],
+    "auto-gptq": ["optimum", "auto-gptq"],
     "sentence-transformers": ["sentence-transformers"],
     "bitsandbytes": ["bitsandbytes"],
     "codecarbon": ["codecarbon"],
@@ -114,7 +108,7 @@
         "License :: OSI Approved :: Apache Software License",
         "Topic :: Scientific/Engineering :: Artificial Intelligence",
     ],
-    keywords="benchmaek, transformers, quantization, pruning, optimization, training, inference, onnx, onnx runtime, intel, "
+    keywords="benchmark, transformers, quantization, pruning, optimization, training, inference, onnx, onnx runtime, intel, "
     "habana, graphcore, neural compressor, ipex, ipu, hpu, llm-swarm, py-txi, vllm, llama-cpp, auto-gptq, autoawq, "
     "sentence-transformers, bitsandbytes, codecarbon, flash-attn, deepspeed, diffusers, timm, peft",
     long_description=open("README.md", "r", encoding="utf-8").read(),