From 258706f96a97339e831f6ddcd351b544c70d1336 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Wed, 24 Apr 2024 14:31:43 +0530 Subject: [PATCH] Added support for float32 --- bench_tensorrtllm/bench.sh | 1 - bench_tensorrtllm/setup.sh | 26 +++++++++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/bench_tensorrtllm/bench.sh b/bench_tensorrtllm/bench.sh index c5cd405..ecdac5d 100755 --- a/bench_tensorrtllm/bench.sh +++ b/bench_tensorrtllm/bench.sh @@ -57,7 +57,6 @@ check_platform() { fi } - setup() { local MODEL_NAME="${1:-llama}" echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..." diff --git a/bench_tensorrtllm/setup.sh b/bench_tensorrtllm/setup.sh index fe11020..f3cdc8d 100755 --- a/bench_tensorrtllm/setup.sh +++ b/bench_tensorrtllm/setup.sh @@ -88,9 +88,12 @@ build_engine () { --dtype float16 elif [ "$PRECISION" = "float32" ]; then - echo "Float32 is not currently support" - echo "checkout issue: https://github.com/NVIDIA/TensorRT-LLM/issues/1485" - exit 1 + docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 \ + -v "$CURRENT_DIR/models":/mnt/models \ + tensorrt_llm/release:latest \ + python3 "$CONVERT_CHECKPOINT_PATH" --model_dir "$HF_MODEL_DIR" \ + --output_dir "$ENGINE_DIR" \ + --dtype float32 elif [ "$PRECISION" = "int8" ]; then docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 \ @@ -120,10 +123,18 @@ build_engine () { # Now build the engine echo "Finally converting to .engine format" - docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 \ - -v "$CURRENT_DIR/models":/mnt/models \ - tensorrt_llm/release:latest \ - trtllm-build --checkpoint_dir "$ENGINE_DIR" --output_dir "$ENGINE_DIR" --gemm_plugin float16 + if [ "$PRECISION" = "float16" ] || [ "$PRECISION" = "int4" ] || [ "$PRECISION" = "int8" ]; then + docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 \ + -v "$CURRENT_DIR/models":/mnt/models \ + tensorrt_llm/release:latest \ + trtllm-build --checkpoint_dir "$ENGINE_DIR" --output_dir "$ENGINE_DIR" --gemm_plugin float16 + else + docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 \ + -v "$CURRENT_DIR/models":/mnt/models \ + tensorrt_llm/release:latest \ + trtllm-build --checkpoint_dir "$ENGINE_DIR" --output_dir "$ENGINE_DIR" --gemm_plugin float32 --strongly_typed --gpt_attention_plugin float32 + fi + else echo "Engine file already exists" fi @@ -135,6 +146,7 @@ build_and_compile_all_engines () { build_engine "$MODEL_NAME" "float16" build_engine "$MODEL_NAME" "int8" build_engine "$MODEL_NAME" "int4" + build_engine "$MODEL_NAME" "float32" else echo "Docker image does not exist, please build the docker image first ..." fi