diff --git a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py
index 37ecdb570..fb18fa6cc 100644
--- a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py
+++ b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py
@@ -13,7 +13,7 @@
 import requests
 import time
 from typing import AsyncGenerator, List, Optional, Tuple, Dict
-from prometheus_client import start_http_server, Histogram
+from prometheus_client import start_http_server, Histogram, Gauge
 
 import google.auth
 import google.auth.transport.requests
@@ -34,6 +34,18 @@
 prompt_length_metric = Histogram("LatencyProfileGenerator:prompt_length", "Input prompt length", buckets=[2**i for i in range(1, 16)])
 response_length_metric = Histogram("LatencyProfileGenerator:response_length", "Response length", buckets=[2**i for i in range(1, 16)])
 tpot_metric = Histogram('LatencyProfileGenerator:time_per_output_token', 'Time per output token per request')
+active_requests_metric = Gauge('LatencyProfileGenerator:active_requests', 'How many requests actively being processed')
+
+# Add trace config for monitoring in flight requests
+async def on_request_start(session, trace_config_ctx, params):
+    active_requests_metric.inc()
+
+async def on_request_end(session, trace_config_ctx, params):
+    active_requests_metric.dec()
+
+trace_config = aiohttp.TraceConfig()
+trace_config.on_request_start.append(on_request_start)
+trace_config.on_request_end.append(on_request_end)
 
 def sample_requests(
     dataset_path: str,
@@ -208,7 +220,7 @@ async def send_request(
 
   # Set client timeout to be 3 hrs.
   timeout = aiohttp.ClientTimeout(total=CLIENT_TIMEOUT_SEC)
-  async with aiohttp.ClientSession(timeout=timeout,trust_env=True) as session:
+  async with aiohttp.ClientSession(timeout=timeout,trust_env=True,trace_configs=[trace_config]) as session:
     while True:
       try:
         async with session.post(api_url, headers=headers, json=pload, ssl=False) as response: