diff --git a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py
index 8829e5a1e..668a1e4d1 100644
--- a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py
+++ b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py
@@ -225,7 +225,8 @@ async def send_stream_request(
   output_token_ids = tokenizer(output).input_ids
   output_len = len(output_token_ids)
   request_latency = (prompt_len, output_len, (request_end_time - request_start_time))
-  tpot_metric.observe((request_end_time - request_start_time) / output_len)
+  # Exclude first token for tpot calculation
+  tpot_metric.observe((request_end_time - ttft - request_start_time) / (output_len - 1))
   if ttft is not None:
     ttft_metric.observe(ttft)
   prompt_length_metric.observe(prompt_len)