diff --git a/benchmarks/benchmark/tools/profile-generator/container/latency_throughput_curve.sh b/benchmarks/benchmark/tools/profile-generator/container/latency_throughput_curve.sh index 97ba646d3..53e9da2d0 100644 --- a/benchmarks/benchmark/tools/profile-generator/container/latency_throughput_curve.sh +++ b/benchmarks/benchmark/tools/profile-generator/container/latency_throughput_curve.sh @@ -29,7 +29,14 @@ for request_rate in $(echo $REQUEST_RATES | tr ',' ' '); do # TODO: Check if profile already exists, if so then skip timestamp=$(date +"%Y-%m-%d_%H-%M-%S") output_file="latency-profile-${timestamp}.txt" - PYTHON_OPTS="$PYTHON_OPTS --save-json-results --host=$IP --port=$PORT --model=$TOKENIZER --dataset=$PROMPT_DATASET_FILE --tokenizer=$TOKENIZER --request-rate=$request_rate --backend=$BACKEND --num-prompts=$((request_rate * $BENCHMARK_TIME_SECONDS)) --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH" + if [ ${request_rate} == 0 ]; then + request_rate="inf" + NUM_PROMPTS=$MAX_NUM_PROMPTS + else + NUM_PROMPTS=$((${request_rate} * $BENCHMARK_TIME_SECONDS)) + fi + + PYTHON_OPTS="$PYTHON_OPTS --save-json-results --host=$IP --port=$PORT --model=$TOKENIZER --dataset=$PROMPT_DATASET_FILE --tokenizer=$TOKENIZER --request-rate=$request_rate --backend=$BACKEND --num-prompts=$NUM_PROMPTS --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH" if [[ "$SCRAPE_SERVER_METRICS" = "true" ]]; then PYTHON_OPTS="$PYTHON_OPTS --scrape-server-metrics" fi diff --git a/benchmarks/benchmark/tools/profile-generator/modules/latency-profile/manifest-templates/latency-profile-generator.yaml.tpl b/benchmarks/benchmark/tools/profile-generator/modules/latency-profile/manifest-templates/latency-profile-generator.yaml.tpl index 8dafa56d1..520d83379 100644 --- a/benchmarks/benchmark/tools/profile-generator/modules/latency-profile/manifest-templates/latency-profile-generator.yaml.tpl +++ b/benchmarks/benchmark/tools/profile-generator/modules/latency-profile/manifest-templates/latency-profile-generator.yaml.tpl @@ -42,6 +42,8 @@ spec: value: ${output_bucket} - name: SCRAPE_SERVER_METRICS value: ${scrape_server_metrics} + - name: MAX_NUM_PROMPTS + value: ${max_num_prompts} %{ for hugging_face_token_secret in hugging_face_token_secret_list ~} - name: HF_TOKEN valueFrom: