Skip to content

Commit

Permalink
Added support for float32
Browse files Browse the repository at this point in the history
  • Loading branch information
Anindyadeep committed Apr 24, 2024
1 parent ed1cc4c commit 258706f
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 8 deletions.
1 change: 0 additions & 1 deletion bench_tensorrtllm/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ check_platform() {
fi
}


setup() {
local MODEL_NAME="${1:-llama}"
echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..."
Expand Down
26 changes: 19 additions & 7 deletions bench_tensorrtllm/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,12 @@ build_engine () {
--dtype float16

elif [ "$PRECISION" = "float32" ]; then
echo "Float32 is not currently support"
echo "checkout issue: https://github.com/NVIDIA/TensorRT-LLM/issues/1485"
exit 1
docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 \
-v "$CURRENT_DIR/models":/mnt/models \
tensorrt_llm/release:latest \
python3 "$CONVERT_CHECKPOINT_PATH" --model_dir "$HF_MODEL_DIR" \
--output_dir "$ENGINE_DIR" \
--dtype float32

elif [ "$PRECISION" = "int8" ]; then
docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 \
Expand Down Expand Up @@ -120,10 +123,18 @@ build_engine () {
# Now build the engine
echo "Finally converting to .engine format"

docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 \
-v "$CURRENT_DIR/models":/mnt/models \
tensorrt_llm/release:latest \
trtllm-build --checkpoint_dir "$ENGINE_DIR" --output_dir "$ENGINE_DIR" --gemm_plugin float16
if [ "$PRECISION" = "float16" ] || [ "$PRECISION" = "int4" ] || [ "$PRECISION" = "int8" ]; then
docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 \
-v "$CURRENT_DIR/models":/mnt/models \
tensorrt_llm/release:latest \
trtllm-build --checkpoint_dir "$ENGINE_DIR" --output_dir "$ENGINE_DIR" --gemm_plugin float16
else
docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 \
-v "$CURRENT_DIR/models":/mnt/models \
tensorrt_llm/release:latest \
trtllm-build --checkpoint_dir "$ENGINE_DIR" --output_dir "$ENGINE_DIR" --gemm_plugin float32 --strongly_typed --gpt_attention_plugin float32
fi

else
echo "Engine file already exists"
fi
Expand All @@ -135,6 +146,7 @@ build_and_compile_all_engines () {
build_engine "$MODEL_NAME" "float16"
build_engine "$MODEL_NAME" "int8"
build_engine "$MODEL_NAME" "int4"
build_engine "$MODEL_NAME" "float32"
else
echo "Docker image does not exist, please build the docker image first ..."
fi
Expand Down

0 comments on commit 258706f

Please sign in to comment.