Skip to content

Commit

Permalink
Build from source quantization packages (#239)
Browse files Browse the repository at this point in the history
Co-authored-by: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
  • Loading branch information
3 people committed Sep 4, 2024
1 parent 6dfec54 commit 8e57994
Show file tree
Hide file tree
Showing 11 changed files with 179 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_cli_cuda_pytorch_single_gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,4 @@ jobs:
--workdir /workspace
run: |
pip install -e .[testing,diffusers,timm,peft,bitsandbytes,autoawq,auto-gptq]
pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (awq)"
pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)"
5 changes: 1 addition & 4 deletions .github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,5 @@ jobs:
pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq] "deepspeed<0.15"
- name: Run tests
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
PUSH_REPO_ID: optimum-benchmark/rocm
run: |
pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb"
5 changes: 1 addition & 4 deletions .github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,5 @@ jobs:
pip install -e .[testing,diffusers,timm,peft,autoawq,auto-gptq]
- name: Run tests
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
PUSH_REPO_ID: optimum-benchmark/rocm
run: |
pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq or gptq)"
pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb"
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,6 @@ experiments/
amdsmi/
amd-*

# Mac specific
external_repos/
.DS_Store
outputs/
outputs/
14 changes: 7 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,19 @@ install:
## Build docker

build_cpu_image:
docker build -t optimum-benchmark:latest-cpu docker/cpu
docker build -t optimum-benchmark:latest-cpu -f docker/cpu/Dockerfile .
docker build --build-arg IMAGE=optimum-benchmark:latest-cpu --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cpu docker/unroot

build_cuda_image:
docker build -t optimum-benchmark:latest-cuda docker/cuda
docker build -t optimum-benchmark:latest-cuda -f docker/cuda/Dockerfile .
docker build --build-arg IMAGE=optimum-benchmark:latest-cuda --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda docker/unroot

build_cuda_ort_image:
docker build -t optimum-benchmark:latest-cuda-ort docker/cuda-ort
docker build -t optimum-benchmark:latest-cuda-ort -f docker/cuda-ort/Dockerfile .
docker build --build-arg IMAGE=optimum-benchmark:latest-cuda-ort --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda-ort docker/unroot

build_rocm_image:
docker build -t optimum-benchmark:latest-rocm docker/rocm
docker build -t optimum-benchmark:latest-rocm -f docker/rocm/Dockerfile .
docker build --build-arg IMAGE=optimum-benchmark:latest-rocm --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-rocm docker/unroot

# Run docker
Expand Down Expand Up @@ -111,7 +111,7 @@ install_cli_cuda_pytorch:
pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]

install_cli_rocm_pytorch:
pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq] "deepspeed<0.15"

install_cli_cuda_torch_ort:
pip install -e .[testing,timm,diffusers,peft,torch-ort,deepspeed]
Expand Down Expand Up @@ -167,10 +167,10 @@ test_cli_cuda_torch_ort_single_gpu:
pytest -s -k "cli and cuda and torch-ort and not (dp or ddp or device_map or deepspeed) and not peft"

test_cli_rocm_pytorch_multi_gpu:
pytest -s -k "cli and rocm and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
pytest -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb"

test_cli_rocm_pytorch_single_gpu:
pytest -s -k "cli and rocm and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
pytest -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb"

test_cli_llama_cpp:
pytest -s -k "llama_cpp"
Expand Down
2 changes: 1 addition & 1 deletion docker/cuda-ort/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,5 @@ else \
pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \
fi

ENV TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX"
ENV TORCH_CUDA_ARCH_LIST="5.0 6.0 7.0 7.5 8.0 8.6 9.0+PTX"
RUN pip install --no-cache-dir torch-ort onnxruntime-training && python -m torch_ort.configure
13 changes: 10 additions & 3 deletions docker/cuda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ ARG UBUNTU_VERSION=22.04
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu${UBUNTU_VERSION}

# Install necessary packages
ENV DEBIAN_FRONTEND noninteractive
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
sudo build-essential git bash-completion \
python3.10 python3-pip python3.10-dev && \
apt-get clean && rm -rf /var/lib/apt/lists/* && \
update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
pip install --no-cache-dir --upgrade pip setuptools wheel
pip install --no-cache-dir --upgrade pip setuptools wheel requests

# Install PyTorch
ARG TORCH_CUDA=cu124
Expand All @@ -36,4 +36,11 @@ elif [ "${TORCH_VERSION}" = "nighly" ]; then \
pip install --no-cache-dir --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \
else \
pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \
fi
fi

# Install quantization libraries from source
ENV CUDA_VERSION=12.4
ENV TORCH_CUDA_ARCH_LIST="6.0 7.0 7.5 8.0 8.6 9.0+PTX"

COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py
RUN python internal/install_quantization_libs.py --install-autogptq-from-source --install-autoawq-from-source
14 changes: 11 additions & 3 deletions docker/rocm/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

ARG ROCM_VERSION=6.1.2
ARG ROCM_VERSION=5.7.1
ARG UBUNTU_VERSION=22.04

FROM rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}
Expand All @@ -21,15 +21,16 @@ FROM rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}
ENV PATH="/opt/rocm/bin:${PATH}"
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
rocsparse-dev hipsparse-dev rocthrust-dev rocblas-dev hipblas-dev \
sudo build-essential git bash-completion \
python3.10 python3-pip python3.10-dev && \
apt-get clean && rm -rf /var/lib/apt/lists/* && \
update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
pip install --no-cache-dir --upgrade pip setuptools wheel && \
pip install --no-cache-dir --upgrade pip setuptools wheel requests && \
cd /opt/rocm/share/amd_smi && pip install .

# Install PyTorch
ARG TORCH_ROCM=rocm6.1
ARG TORCH_ROCM=rocm5.7
ARG TORCH_VERSION=stable

RUN if [ "${TORCH_VERSION}" = "stable" ]; then \
Expand All @@ -39,3 +40,10 @@ elif [ "${TORCH_VERSION}" = "nightly" ]; then \
else \
pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_ROCM} ; \
fi

# Install quantization libraries from source
ENV ROCM_VERSION=5.7
ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"

COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py
RUN python internal/install_quantization_libs.py --install-autogptq-from-source --install-autoawq-from-source
24 changes: 24 additions & 0 deletions optimum_benchmark/backends/pytorch/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,11 +286,35 @@ def create_no_weights_model(self) -> None:
def process_quantization_config(self) -> None:
if self.is_gptq_quantized:
self.logger.info("\t+ Processing GPTQ config")

try:
import exllamav2_kernels # noqa: F401
except ImportError:
raise ImportError(
"Tried to import `exllamav2_kernels` but failed. "
"This means that the AutoGPTQ package is either not installed or not compiled with the right torch version. "
"Please install it from source following the instructions at `https://github.com/AutoGPTQ/AutoGPTQ`"
"Or use `python scripts/install_quantization_libs.py --install-autogptq-from-source` in "
"`optimum-benchmark` repository at `https://github.com/huggingface/optimum-benchmark`."
)

self.quantization_config = GPTQConfig(
**dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config)
)
elif self.is_awq_quantized:
self.logger.info("\t+ Processing AWQ config")

try:
import exlv2_ext # noqa: F401
except ImportError:
raise ImportError(
"Tried to import `exlv2_ext` but failed. "
"This means that the AutoAWQ package is either not installed or not compiled with the right torch version. "
"Please install it from source following the instructions at `https://github.com/casper-hansen/AutoAWQ`"
"Or use `python scripts/install_quantization_libs.py --install-autoawq-from-source` in "
"`optimum-benchmark` repository at `https://github.com/huggingface/optimum-benchmark`."
)

self.quantization_config = AwqConfig(
**dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config)
)
Expand Down
118 changes: 118 additions & 0 deletions scripts/install_quantization_libs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import argparse
import os
import re
import subprocess
import sys

EXTERNAL_REPOS_DIR = "external_repos"


def process_setup_file(setup_file_path):
with open(setup_file_path, "r") as file:
setup_content = file.read()

# Use a regular expression to remove any line containing "torch=="
setup_content = re.sub(r'"torch==[^\"]+",', "", setup_content)

# Set IS_CPU_ONLY to False
setup_content = setup_content.replace(
"IS_CPU_ONLY = not torch.backends.mps.is_available() and not torch.cuda.is_available()", "IS_CPU_ONLY = False"
)

# Write the modified content back to setup.py
with open(setup_file_path, "w") as file:
file.write(setup_content)


def clone_or_pull_repo(repo_url, repo_location_path):
"""Clone the repo if it doesn't exist; otherwise, pull the latest changes."""
if os.path.exists(repo_location_path):
print(f"Directory {repo_location_path} already exists. Pulling the latest changes.")
subprocess.run(f"cd {repo_location_path} && git pull", shell=True, check=True)
else:
repo_name = repo_location_path.split("/")[-1]
print(f"Cloning {repo_name} into {repo_location_path}")
subprocess.run(f"git clone {repo_url} {repo_location_path}", shell=True, check=True)


def install_autoawq_from_source():
"""Install the AutoAWQ and AutoAWQ_kernels packages from GitHub."""
print("Installing AutoAWQ and AutoAWQ_kernels packages.")

autoawq_repo_name = "AutoAWQ"
autoawq_kernels_repo_name = "AutoAWQ_kernels"

autoawq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_repo_name)
kernels_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_kernels_repo_name)

clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_kernels_repo_name}", kernels_repo_path)
kernels_setup_file_path = os.path.join(kernels_repo_path, "setup.py")
process_setup_file(kernels_setup_file_path)
subprocess.run(
f"cd {kernels_repo_path} && {sys.executable} -m pip install .",
shell=True,
check=True,
env=os.environ,
)

clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_repo_name}", autoawq_repo_path)
autoawq_setup_file_path = os.path.join(autoawq_repo_path, "setup.py")
process_setup_file(autoawq_setup_file_path)
subprocess.run(
f"cd {autoawq_repo_path} && {sys.executable} -m pip install .",
shell=True,
check=True,
env=os.environ,
)

print("AutoAWQ and AutoAWQ_kernels packages installed.")


def install_autogptq_from_source():
"""Install the AutoGPTQ package from GitHub."""
print("Installing AutoGPTQ package.")
autogptq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, "AutoGPTQ")

clone_or_pull_repo("https://github.com/PanQiWei/AutoGPTQ.git", autogptq_repo_path)
subprocess.run("pip install numpy gekko pandas", shell=True, check=True, env=os.environ)
autogptq_setup_file_path = os.path.join(autogptq_repo_path, "setup.py")
process_setup_file(autogptq_setup_file_path)
subprocess.run(
f"cd {autogptq_repo_path} && {sys.executable} -m pip install .",
shell=True,
check=True,
env=os.environ,
)

print("AutoGPTQ package installed.")


def main():
parser = argparse.ArgumentParser(description="Install AutoAWQ or AutoGPTQ from source.")
parser.add_argument(
"--install-autoawq-from-source",
action="store_true",
help="Install AutoAWQ and AutoAWQ_kernels packages from source.",
)
parser.add_argument(
"--install-autogptq-from-source",
action="store_true",
help="Install AutoGPTQ package from source.",
)

args = parser.parse_args()

if args.install_autoawq_from_source:
install_autoawq_from_source()
if args.install_autogptq_from_source:
install_autogptq_from_source()

if not args.install_autoawq_from_source and not args.install_autogptq_from_source:
print(
"Please specify an installation option. Use --install-autoawq-from-source or --install-autogptq-from-source."
)
sys.exit(1)


if __name__ == "__main__":
main()
12 changes: 3 additions & 9 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,6 @@
"Please install amdsmi from https://github.com/ROCm/amdsmi to enable this feature."
)

if USE_ROCM:
AUTOAWQ = "autoawq@https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.1/autoawq-0.2.1+rocm571-cp310-cp310-linux_x86_64.whl"
AUTOGPTQ = "auto-gptq@https://huggingface.github.io/autogptq-index/whl/rocm573/auto-gptq/auto_gptq-0.7.1%2Brocm5.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
else:
AUTOAWQ = "autoawq==0.2.1"
AUTOGPTQ = "auto-gptq==0.7.1"

EXTRAS_REQUIRE = {
"quality": ["ruff"],
Expand All @@ -81,8 +75,8 @@
"py-txi": ["py-txi"],
"vllm": ["vllm"],
# optional dependencies
"autoawq": [AUTOAWQ],
"auto-gptq": ["optimum", AUTOGPTQ],
"autoawq": ["autoawq"],
"auto-gptq": ["optimum", "auto-gptq"],
"sentence-transformers": ["sentence-transformers"],
"bitsandbytes": ["bitsandbytes"],
"codecarbon": ["codecarbon"],
Expand Down Expand Up @@ -114,7 +108,7 @@
"License :: OSI Approved :: Apache Software License",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
],
keywords="benchmaek, transformers, quantization, pruning, optimization, training, inference, onnx, onnx runtime, intel, "
keywords="benchmark, transformers, quantization, pruning, optimization, training, inference, onnx, onnx runtime, intel, "
"habana, graphcore, neural compressor, ipex, ipu, hpu, llm-swarm, py-txi, vllm, llama-cpp, auto-gptq, autoawq, "
"sentence-transformers, bitsandbytes, codecarbon, flash-attn, deepspeed, diffusers, timm, peft",
long_description=open("README.md", "r", encoding="utf-8").read(),
Expand Down

0 comments on commit 8e57994

Please sign in to comment.