diff --git a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py
index c49a694cb..c05f3b4e2 100644
--- a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py
+++ b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py
@@ -13,12 +13,13 @@
 import requests
 import time
 from typing import AsyncGenerator, List, Optional, Tuple, Dict
-from prometheus_client import start_http_server, Histogram, Gauge
+from prometheus_client import start_http_server, Histogram
 
 import google.auth
 import google.auth.transport.requests
 
 import aiohttp
+from aiohttp_prometheus_exporter.trace import PrometheusTraceConfig
 import numpy as np
 from transformers import AutoTokenizer
 from transformers import PreTrainedTokenizerBase
@@ -34,7 +35,6 @@
 prompt_length_metric = Histogram("LatencyProfileGenerator:prompt_length", "Input prompt length", buckets=[2**i for i in range(1, 16)])
 response_length_metric = Histogram("LatencyProfileGenerator:response_length", "Response length", buckets=[2**i for i in range(1, 16)])
 tpot_metric = Histogram('LatencyProfileGenerator:time_per_output_token', 'Time per output token per request')
-active_requests_metric = Gauge('LatencyProfileGenerator:active_requests', 'How many requests actively being processed')
 
 def sample_requests(
     dataset_path: str,
@@ -209,14 +209,11 @@ async def send_request(
 
   # Set client timeout to be 3 hrs.
   timeout = aiohttp.ClientTimeout(total=CLIENT_TIMEOUT_SEC)
-  async with aiohttp.ClientSession(timeout=timeout,trust_env=True) as session:
+  async with aiohttp.ClientSession(timeout=timeout,trust_env=True,trace_configs=[PrometheusTraceConfig()]) as session:
     while True:
       try:
-        active_requests_metric.inc()
         async with session.post(api_url, headers=headers, json=pload, ssl=False) as response:
-          
           output = await response.json()
-          active_requests_metric.dec()
 
         # Re-send the request if it failed.
         if "error" not in output: