From 8e57994b5ce38f2b6b651d101ec502f7ee7fbb9a Mon Sep 17 00:00:00 2001 From: Baptiste Colle <32412211+baptistecolle@users.noreply.github.com> Date: Wed, 4 Sep 2024 16:07:15 +0200 Subject: [PATCH] Build from source quantization packages (#239) Co-authored-by: IlyasMoutawwakil Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com> --- .../test_cli_cuda_pytorch_single_gpu.yaml | 2 +- .../test_cli_rocm_pytorch_multi_gpu.yaml | 5 +- .../test_cli_rocm_pytorch_single_gpu.yaml | 5 +- .gitignore | 4 +- Makefile | 14 +-- docker/cuda-ort/Dockerfile | 2 +- docker/cuda/Dockerfile | 13 +- docker/rocm/Dockerfile | 14 ++- optimum_benchmark/backends/pytorch/backend.py | 24 ++++ scripts/install_quantization_libs.py | 118 ++++++++++++++++++ setup.py | 12 +- 11 files changed, 179 insertions(+), 34 deletions(-) create mode 100644 scripts/install_quantization_libs.py diff --git a/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml b/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml index 0a404bd0..3ba94abc 100644 --- a/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml +++ b/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml @@ -49,4 +49,4 @@ jobs: --workdir /workspace run: | pip install -e .[testing,diffusers,timm,peft,bitsandbytes,autoawq,auto-gptq] - pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (awq)" + pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)" diff --git a/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml b/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml index 9b8ba321..050b53e1 100644 --- a/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml +++ b/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml @@ -49,8 +49,5 @@ jobs: pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq] "deepspeed<0.15" - name: Run tests - env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} - PUSH_REPO_ID: optimum-benchmark/rocm run: | - pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)" + pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb" diff --git a/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml b/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml index 3f19ad11..59fdd4f6 100644 --- a/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml +++ b/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml @@ -49,8 +49,5 @@ jobs: pip install -e .[testing,diffusers,timm,peft,autoawq,auto-gptq] - name: Run tests - env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} - PUSH_REPO_ID: optimum-benchmark/rocm run: | - pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq or gptq)" + pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb" diff --git a/.gitignore b/.gitignore index f26fda31..62416b0c 100644 --- a/.gitignore +++ b/.gitignore @@ -173,6 +173,6 @@ experiments/ amdsmi/ amd-* -# Mac specific +external_repos/ .DS_Store -outputs/ \ No newline at end of file +outputs/ diff --git a/Makefile b/Makefile index 8883c3cc..0e14a5db 100644 --- a/Makefile +++ b/Makefile @@ -19,19 +19,19 @@ install: ## Build docker build_cpu_image: - docker build -t optimum-benchmark:latest-cpu docker/cpu + docker build -t optimum-benchmark:latest-cpu -f docker/cpu/Dockerfile . docker build --build-arg IMAGE=optimum-benchmark:latest-cpu --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cpu docker/unroot build_cuda_image: - docker build -t optimum-benchmark:latest-cuda docker/cuda + docker build -t optimum-benchmark:latest-cuda -f docker/cuda/Dockerfile . docker build --build-arg IMAGE=optimum-benchmark:latest-cuda --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda docker/unroot build_cuda_ort_image: - docker build -t optimum-benchmark:latest-cuda-ort docker/cuda-ort + docker build -t optimum-benchmark:latest-cuda-ort -f docker/cuda-ort/Dockerfile . docker build --build-arg IMAGE=optimum-benchmark:latest-cuda-ort --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda-ort docker/unroot build_rocm_image: - docker build -t optimum-benchmark:latest-rocm docker/rocm + docker build -t optimum-benchmark:latest-rocm -f docker/rocm/Dockerfile . docker build --build-arg IMAGE=optimum-benchmark:latest-rocm --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-rocm docker/unroot # Run docker @@ -111,7 +111,7 @@ install_cli_cuda_pytorch: pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed] install_cli_rocm_pytorch: - pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed] + pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq] "deepspeed<0.15" install_cli_cuda_torch_ort: pip install -e .[testing,timm,diffusers,peft,torch-ort,deepspeed] @@ -167,10 +167,10 @@ test_cli_cuda_torch_ort_single_gpu: pytest -s -k "cli and cuda and torch-ort and not (dp or ddp or device_map or deepspeed) and not peft" test_cli_rocm_pytorch_multi_gpu: - pytest -s -k "cli and rocm and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)" + pytest -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb" test_cli_rocm_pytorch_single_gpu: - pytest -s -k "cli and rocm and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq)" + pytest -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb" test_cli_llama_cpp: pytest -s -k "llama_cpp" diff --git a/docker/cuda-ort/Dockerfile b/docker/cuda-ort/Dockerfile index 056d3ebf..9b94fae7 100644 --- a/docker/cuda-ort/Dockerfile +++ b/docker/cuda-ort/Dockerfile @@ -39,5 +39,5 @@ else \ pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \ fi -ENV TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX" +ENV TORCH_CUDA_ARCH_LIST="5.0 6.0 7.0 7.5 8.0 8.6 9.0+PTX" RUN pip install --no-cache-dir torch-ort onnxruntime-training && python -m torch_ort.configure diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile index 61425635..fa43fa49 100644 --- a/docker/cuda/Dockerfile +++ b/docker/cuda/Dockerfile @@ -18,13 +18,13 @@ ARG UBUNTU_VERSION=22.04 FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu${UBUNTU_VERSION} # Install necessary packages -ENV DEBIAN_FRONTEND noninteractive +ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends \ sudo build-essential git bash-completion \ python3.10 python3-pip python3.10-dev && \ apt-get clean && rm -rf /var/lib/apt/lists/* && \ update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \ - pip install --no-cache-dir --upgrade pip setuptools wheel + pip install --no-cache-dir --upgrade pip setuptools wheel requests # Install PyTorch ARG TORCH_CUDA=cu124 @@ -36,4 +36,11 @@ elif [ "${TORCH_VERSION}" = "nighly" ]; then \ pip install --no-cache-dir --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \ else \ pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \ -fi \ No newline at end of file +fi + +# Install quantization libraries from source +ENV CUDA_VERSION=12.4 +ENV TORCH_CUDA_ARCH_LIST="6.0 7.0 7.5 8.0 8.6 9.0+PTX" + +COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py +RUN python internal/install_quantization_libs.py --install-autogptq-from-source --install-autoawq-from-source diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile index 7dfd3719..611d058a 100644 --- a/docker/rocm/Dockerfile +++ b/docker/rocm/Dockerfile @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG ROCM_VERSION=6.1.2 +ARG ROCM_VERSION=5.7.1 ARG UBUNTU_VERSION=22.04 FROM rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION} @@ -21,15 +21,16 @@ FROM rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION} ENV PATH="/opt/rocm/bin:${PATH}" ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \ + rocsparse-dev hipsparse-dev rocthrust-dev rocblas-dev hipblas-dev \ sudo build-essential git bash-completion \ python3.10 python3-pip python3.10-dev && \ apt-get clean && rm -rf /var/lib/apt/lists/* && \ update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \ - pip install --no-cache-dir --upgrade pip setuptools wheel && \ + pip install --no-cache-dir --upgrade pip setuptools wheel requests && \ cd /opt/rocm/share/amd_smi && pip install . # Install PyTorch -ARG TORCH_ROCM=rocm6.1 +ARG TORCH_ROCM=rocm5.7 ARG TORCH_VERSION=stable RUN if [ "${TORCH_VERSION}" = "stable" ]; then \ @@ -39,3 +40,10 @@ elif [ "${TORCH_VERSION}" = "nightly" ]; then \ else \ pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_ROCM} ; \ fi + +# Install quantization libraries from source +ENV ROCM_VERSION=5.7 +ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100" + +COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py +RUN python internal/install_quantization_libs.py --install-autogptq-from-source --install-autoawq-from-source \ No newline at end of file diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py index bb747223..fcf522b5 100644 --- a/optimum_benchmark/backends/pytorch/backend.py +++ b/optimum_benchmark/backends/pytorch/backend.py @@ -286,11 +286,35 @@ def create_no_weights_model(self) -> None: def process_quantization_config(self) -> None: if self.is_gptq_quantized: self.logger.info("\t+ Processing GPTQ config") + + try: + import exllamav2_kernels # noqa: F401 + except ImportError: + raise ImportError( + "Tried to import `exllamav2_kernels` but failed. " + "This means that the AutoGPTQ package is either not installed or not compiled with the right torch version. " + "Please install it from source following the instructions at `https://github.com/AutoGPTQ/AutoGPTQ`" + "Or use `python scripts/install_quantization_libs.py --install-autogptq-from-source` in " + "`optimum-benchmark` repository at `https://github.com/huggingface/optimum-benchmark`." + ) + self.quantization_config = GPTQConfig( **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config) ) elif self.is_awq_quantized: self.logger.info("\t+ Processing AWQ config") + + try: + import exlv2_ext # noqa: F401 + except ImportError: + raise ImportError( + "Tried to import `exlv2_ext` but failed. " + "This means that the AutoAWQ package is either not installed or not compiled with the right torch version. " + "Please install it from source following the instructions at `https://github.com/casper-hansen/AutoAWQ`" + "Or use `python scripts/install_quantization_libs.py --install-autoawq-from-source` in " + "`optimum-benchmark` repository at `https://github.com/huggingface/optimum-benchmark`." + ) + self.quantization_config = AwqConfig( **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config) ) diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py new file mode 100644 index 00000000..9feebbc1 --- /dev/null +++ b/scripts/install_quantization_libs.py @@ -0,0 +1,118 @@ +import argparse +import os +import re +import subprocess +import sys + +EXTERNAL_REPOS_DIR = "external_repos" + + +def process_setup_file(setup_file_path): + with open(setup_file_path, "r") as file: + setup_content = file.read() + + # Use a regular expression to remove any line containing "torch==" + setup_content = re.sub(r'"torch==[^\"]+",', "", setup_content) + + # Set IS_CPU_ONLY to False + setup_content = setup_content.replace( + "IS_CPU_ONLY = not torch.backends.mps.is_available() and not torch.cuda.is_available()", "IS_CPU_ONLY = False" + ) + + # Write the modified content back to setup.py + with open(setup_file_path, "w") as file: + file.write(setup_content) + + +def clone_or_pull_repo(repo_url, repo_location_path): + """Clone the repo if it doesn't exist; otherwise, pull the latest changes.""" + if os.path.exists(repo_location_path): + print(f"Directory {repo_location_path} already exists. Pulling the latest changes.") + subprocess.run(f"cd {repo_location_path} && git pull", shell=True, check=True) + else: + repo_name = repo_location_path.split("/")[-1] + print(f"Cloning {repo_name} into {repo_location_path}") + subprocess.run(f"git clone {repo_url} {repo_location_path}", shell=True, check=True) + + +def install_autoawq_from_source(): + """Install the AutoAWQ and AutoAWQ_kernels packages from GitHub.""" + print("Installing AutoAWQ and AutoAWQ_kernels packages.") + + autoawq_repo_name = "AutoAWQ" + autoawq_kernels_repo_name = "AutoAWQ_kernels" + + autoawq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_repo_name) + kernels_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_kernels_repo_name) + + clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_kernels_repo_name}", kernels_repo_path) + kernels_setup_file_path = os.path.join(kernels_repo_path, "setup.py") + process_setup_file(kernels_setup_file_path) + subprocess.run( + f"cd {kernels_repo_path} && {sys.executable} -m pip install .", + shell=True, + check=True, + env=os.environ, + ) + + clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_repo_name}", autoawq_repo_path) + autoawq_setup_file_path = os.path.join(autoawq_repo_path, "setup.py") + process_setup_file(autoawq_setup_file_path) + subprocess.run( + f"cd {autoawq_repo_path} && {sys.executable} -m pip install .", + shell=True, + check=True, + env=os.environ, + ) + + print("AutoAWQ and AutoAWQ_kernels packages installed.") + + +def install_autogptq_from_source(): + """Install the AutoGPTQ package from GitHub.""" + print("Installing AutoGPTQ package.") + autogptq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, "AutoGPTQ") + + clone_or_pull_repo("https://github.com/PanQiWei/AutoGPTQ.git", autogptq_repo_path) + subprocess.run("pip install numpy gekko pandas", shell=True, check=True, env=os.environ) + autogptq_setup_file_path = os.path.join(autogptq_repo_path, "setup.py") + process_setup_file(autogptq_setup_file_path) + subprocess.run( + f"cd {autogptq_repo_path} && {sys.executable} -m pip install .", + shell=True, + check=True, + env=os.environ, + ) + + print("AutoGPTQ package installed.") + + +def main(): + parser = argparse.ArgumentParser(description="Install AutoAWQ or AutoGPTQ from source.") + parser.add_argument( + "--install-autoawq-from-source", + action="store_true", + help="Install AutoAWQ and AutoAWQ_kernels packages from source.", + ) + parser.add_argument( + "--install-autogptq-from-source", + action="store_true", + help="Install AutoGPTQ package from source.", + ) + + args = parser.parse_args() + + if args.install_autoawq_from_source: + install_autoawq_from_source() + if args.install_autogptq_from_source: + install_autogptq_from_source() + + if not args.install_autoawq_from_source and not args.install_autogptq_from_source: + print( + "Please specify an installation option. Use --install-autoawq-from-source or --install-autogptq-from-source." + ) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py index 5e401c21..cff0d197 100644 --- a/setup.py +++ b/setup.py @@ -58,12 +58,6 @@ "Please install amdsmi from https://github.com/ROCm/amdsmi to enable this feature." ) -if USE_ROCM: - AUTOAWQ = "autoawq@https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.1/autoawq-0.2.1+rocm571-cp310-cp310-linux_x86_64.whl" - AUTOGPTQ = "auto-gptq@https://huggingface.github.io/autogptq-index/whl/rocm573/auto-gptq/auto_gptq-0.7.1%2Brocm5.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" -else: - AUTOAWQ = "autoawq==0.2.1" - AUTOGPTQ = "auto-gptq==0.7.1" EXTRAS_REQUIRE = { "quality": ["ruff"], @@ -81,8 +75,8 @@ "py-txi": ["py-txi"], "vllm": ["vllm"], # optional dependencies - "autoawq": [AUTOAWQ], - "auto-gptq": ["optimum", AUTOGPTQ], + "autoawq": ["autoawq"], + "auto-gptq": ["optimum", "auto-gptq"], "sentence-transformers": ["sentence-transformers"], "bitsandbytes": ["bitsandbytes"], "codecarbon": ["codecarbon"], @@ -114,7 +108,7 @@ "License :: OSI Approved :: Apache Software License", "Topic :: Scientific/Engineering :: Artificial Intelligence", ], - keywords="benchmaek, transformers, quantization, pruning, optimization, training, inference, onnx, onnx runtime, intel, " + keywords="benchmark, transformers, quantization, pruning, optimization, training, inference, onnx, onnx runtime, intel, " "habana, graphcore, neural compressor, ipex, ipu, hpu, llm-swarm, py-txi, vllm, llama-cpp, auto-gptq, autoawq, " "sentence-transformers, bitsandbytes, codecarbon, flash-attn, deepspeed, diffusers, timm, peft", long_description=open("README.md", "r", encoding="utf-8").read(),