Skip to content

Commit

Permalink
Distributed trt-llm (#275)
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil authored Sep 23, 2024
1 parent 496d745 commit ff8375e
Show file tree
Hide file tree
Showing 13 changed files with 102 additions and 27 deletions.
8 changes: 6 additions & 2 deletions .github/workflows/test_cli_cuda_pytorch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ jobs:
run: |
pip install -e .[testing,diffusers,timm,peft,deepspeed]
- name: Run tests
- name: Run tests (parallel)
run: |
pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map)"
- name: Run tests (sequential)
run: |
FORCE_SERIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed)"
FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (deepspeed_inference)"
34 changes: 32 additions & 2 deletions .github/workflows/test_cli_cuda_tensorrt_llm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}

jobs:
cli_cuda_tensorrt_llm_tests:
cli_cuda_tensorrt_llm_single_gpu_tests:
if: ${{
(github.event_name == 'push') ||
(github.event_name == 'workflow_dispatch') ||
contains( github.event.pull_request.labels.*.name, 'cli') ||
contains( github.event.pull_request.labels.*.name, 'cuda') ||
contains( github.event.pull_request.labels.*.name, 'tensorrt_llm') ||
contains( github.event.pull_request.labels.*.name, 'single_gpu') ||
contains( github.event.pull_request.labels.*.name, 'cli_cuda_tensorrt_llm')
}}

Expand All @@ -46,4 +47,33 @@ jobs:
- name: Run tests
run: |
pytest tests/test_cli.py -x -s -k "cli and cuda and tensorrt_llm"
pytest tests/test_cli.py -x -s -k "cli and cuda and tensorrt_llm and not (tp or pp)"
cli_cuda_tensorrt_llm_multi_gpu_tests:
if: ${{
(github.event_name == 'push') ||
(github.event_name == 'workflow_dispatch') ||
contains( github.event.pull_request.labels.*.name, 'cli') ||
contains( github.event.pull_request.labels.*.name, 'cuda') ||
contains( github.event.pull_request.labels.*.name, 'tensorrt_llm') ||
contains( github.event.pull_request.labels.*.name, 'multi_gpu') ||
contains( github.event.pull_request.labels.*.name, 'cli_cuda_tensorrt_llm_multi_gpu')
}}

runs-on: [multi-gpu, nvidia-gpu, 4-a10, ci]

container:
image: huggingface/optimum-nvidia:latest
options: --ipc host --gpus all

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install dependencies
run: |
pip install -e .[testing]
- name: Run tests (sequential)
run: |
FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and tensorrt_llm and (tp or pp)"
2 changes: 1 addition & 1 deletion .github/workflows/test_cli_cuda_torch_ort.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,4 @@ jobs:
- name: Run tests
run: |
FORCE_SERIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and (dp or ddp or device_map) and not (peft)"
pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and (dp or ddp or device_map)"
8 changes: 4 additions & 4 deletions .github/workflows/test_cli_cuda_vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ jobs:
run: |
pip install -e .[testing]
- name: Run tests
- name: Run tests (sequential)
run: |
FORCE_SERIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and not (tp or pp)"
FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and not (tp or pp)"
run_cli_cuda_vllm_multi_gpu_tests:
if: ${{
Expand All @@ -74,6 +74,6 @@ jobs:
run: |
pip install -e .[testing]
- name: Run tests
- name: Run tests (sequential)
run: |
FORCE_SERIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and (tp or pp)"
FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and (tp or pp)"
3 changes: 2 additions & 1 deletion .github/workflows/test_cli_misc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,5 @@ jobs:
pip install -e .[testing]
- name: Run tests
run: pytest tests/test_cli.py -s -k "cli and not (cpu or cuda or rocm or mps)"
run: |
pytest tests/test_cli.py -s -k "cli and not (cpu or cuda or rocm or mps)"
8 changes: 6 additions & 2 deletions .github/workflows/test_cli_rocm_pytorch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ jobs:
run: |
pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq] "deepspeed<0.15"
- name: Run tests
- name: Run tests (parallel)
run: |
pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map)"
- name: Run tests (sequential)
run: |
FORCE_SERIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb"
FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (deepspeed_inference)"
5 changes: 5 additions & 0 deletions optimum_benchmark/launchers/torchrun/launcher.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import sys
import traceback
from contextlib import ExitStack
from logging import Logger
Expand Down Expand Up @@ -156,6 +157,10 @@ def entrypoint(worker: Callable[..., BenchmarkReport], worker_args: List[Any], l
else:
setup_logging(level="ERROR", to_file=log_to_file, prefix=f"RANK-PROCESS-{rank}")

if sys.platform == "win32":
logger.info("\t+ Disabline libuv on Windows")
os.environ["USE_LIBUV"] = "0"

if torch.cuda.is_available():
logger.info(f"\t+ Setting torch.distributed cuda device to {rank}")
device = torch.device("cuda", rank)
Expand Down
2 changes: 0 additions & 2 deletions tests/configs/_deepspeed_inference_.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ scenario:
batch_size: 2

hydra:
launcher:
n_jobs: 1
job:
env_set:
LOG_ALL_RANKS: 1
6 changes: 6 additions & 0 deletions tests/configs/_tensorrt_llm_pp_.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
backend:
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
gpus_per_node: 2
device_ids: 0,1
world_size: 2
pp: 2
6 changes: 6 additions & 0 deletions tests/configs/_tensorrt_llm_tp_.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
backend:
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
gpus_per_node: 2
device_ids: 0,1
world_size: 2
tp: 2
10 changes: 10 additions & 0 deletions tests/configs/cuda_inference_tensorrt_llm_pp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _cuda_ # inherits from cuda config
- _inference_ # inherits from inference config
- _tensorrt_llm_pp_ # inherits from tensorrt_llm_pp config
- _self_ # hydra 1.1 compatibility
- override backend: tensorrt-llm

name: cuda_inference_tensorrt_llm_pp
10 changes: 10 additions & 0 deletions tests/configs/cuda_inference_tensorrt_llm_tp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _cuda_ # inherits from cuda config
- _inference_ # inherits from inference config
- _tensorrt_llm_tp_ # inherits from tensorrt_llm_tp config
- _self_ # hydra 1.1 compatibility
- override backend: tensorrt-llm

name: cuda_inference_tensorrt_llm_tp
27 changes: 14 additions & 13 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
LOGGER = getLogger("test-cli")


FORCE_SERIAL = os.environ.get("FORCE_SERIAL", "0") == "1"
FORCE_SEQUENTIAL = os.environ.get("FORCE_SEQUENTIAL", "0") == "1"

TEST_CONFIG_DIR = Path(__file__).parent / "configs"
TEST_CONFIG_NAMES = [
config.split(".")[0]
Expand All @@ -30,16 +31,16 @@ def test_cli_configs(config_name):
TEST_CONFIG_DIR,
"--config-name",
config_name,
# to run the tests faster
"hydra/launcher=joblib",
"hydra.launcher.batch_size=1",
"hydra.launcher.prefer=threads",
]

if FORCE_SERIAL:
args += ["hydra.launcher.n_jobs=1"]
else:
args += ["hydra.launcher.n_jobs=-1"]
if not FORCE_SEQUENTIAL:
args += [
# to run the tests faster
"hydra/launcher=joblib",
"hydra.launcher.n_jobs=-1",
"hydra.launcher.batch_size=1",
"hydra.launcher.prefer=threads",
]

if ROCR_VISIBLE_DEVICES is not None:
args += [f'backend.device_ids="{ROCR_VISIBLE_DEVICES}"']
Expand All @@ -50,7 +51,7 @@ def test_cli_configs(config_name):
assert popen.returncode == 0, f"Failed to run {config_name}"


@pytest.mark.parametrize("launcher", ["inline", "process"])
@pytest.mark.parametrize("launcher", ["inline", "process", "torchrun"])
def test_cli_exit_code_0(launcher):
args_0 = [
"optimum-benchmark",
Expand All @@ -59,7 +60,7 @@ def test_cli_exit_code_0(launcher):
"--config-name",
"_base_",
"name=test",
f"launcher={launcher}",
"launcher=" + launcher,
# compatible task and model
"backend.task=text-classification",
"backend.model=bert-base-uncased",
Expand All @@ -79,7 +80,7 @@ def test_cli_exit_code_1(launcher):
"--config-name",
"_base_",
"name=test",
f"launcher={launcher}",
"launcher=" + launcher,
# incompatible task and model to trigger an error
"backend.task=image-classification",
"backend.model=bert-base-uncased",
Expand All @@ -102,7 +103,7 @@ def test_cli_numactl(launcher):
"--config-name",
"_base_",
"name=test",
f"launcher={launcher}",
"launcher=" + launcher,
"launcher.numactl=True",
"backend.task=text-classification",
"backend.model=bert-base-uncased",
Expand Down

0 comments on commit ff8375e

Please sign in to comment.