diff --git a/torchbenchmark/util/experiment/instantiator.py b/torchbenchmark/util/experiment/instantiator.py
index d38f60c722..2dbd730ce0 100644
--- a/torchbenchmark/util/experiment/instantiator.py
+++ b/torchbenchmark/util/experiment/instantiator.py
@@ -24,6 +24,7 @@
 
 @dataclasses.dataclass
 class TorchBenchModelConfig:
+    model_set: str
     name: str
     test: str
     device: str
@@ -160,3 +161,19 @@ def list_extended_models(suite_name: str = "all") -> List[str]:
         assert (
             False
         ), f"Currently, we only support model set torchbench, huggingface or timm, but get {suite_name}."
+
+
+def get_model_set_from_model_name(model_name: str) -> str:
+    from torchbenchmark.util.framework.huggingface.extended_configs import (
+        list_extended_huggingface_models,
+    )
+    from torchbenchmark.util.framework.timm.extended_configs import (
+        list_extended_timm_models,
+    )
+    if model_name in list_extended_huggingface_models():
+        return "huggingface"
+    if model_name in list_extended_timm_models():
+        return "timm"
+    if model_name in list_models():
+        return "torchbench"
+    assert False, f"Model {model_name} is not found in any model set."
\ No newline at end of file
diff --git a/userbenchmark/group_bench/run_config.py b/userbenchmark/group_bench/run_config.py
index 669a5785a0..35253bd5aa 100644
--- a/userbenchmark/group_bench/run_config.py
+++ b/userbenchmark/group_bench/run_config.py
@@ -1,6 +1,7 @@
 import yaml
-from typing import Any, Dict, List, Optional
-from torchbenchmark.util.experiment.instantiator import TorchBenchModelConfig, list_extended_models
+import itertools
+from typing import Any, Dict, List, Optional, Tuple
+from torchbenchmark.util.experiment.instantiator import TorchBenchModelConfig, list_extended_models, get_model_set_from_model_name
 from torchbenchmark.util.experiment.metrics import run_config
 
 def _get_models(models: Optional[List[str]]=None, model_set: Optional[List[str]]=None) -> List[str]:
@@ -9,19 +10,78 @@ def _get_models(models: Optional[List[str]]=None, model_set: Optional[List[str]]
         result.add(list_extended_models(s))
     return list(result) 
 
-def config_obj_to_model_configs(config: Dict[str, Any]) -> Dict[str, Dict[str, TorchBenchModelConfig]]:
-    models = _get_models(config.get("model"), config.get("model_set"))
-    tests = config["test"]
-    devices = config["device"]
+def config_obj_to_model_configs(config: Dict[str, Any]) -> Dict[str, Dict[str, List[TorchBenchModelConfig]]]:
+    models = _get_models(models=config.get("model", None), model_set=config.get("model_set", None))
+    batch_sizes = config.get("batch_size", [None])
+    tests = config.get("test", ["train", "eval"])
+    devices = config.get("device", ["cuda"])
+    precisions = config.get("precision", [None])
     metrics = config["metrics"]
+    test_groups = config["test_groups"]
+    result = {}
+    for group_name in test_groups.keys():
+        extra_args = test_groups[group_name].copy()
+        cfgs = itertools.product(*[devices, tests, batch_sizes, precisions, models])
+        for device, test, batch_size, precision, model_name in cfgs:
+            if precision:
+                extra_args = extra_args.extend(["--precision", precision])
+            if batch_size:
+                batch_size = int(batch_size)
+            common_key = (device, test, batch_size, precision)
+            if not common_key in result:
+                result[common_key] = {}
+            if not group_name in result[common_key]:
+                result[common_key][group_name] = []
+            result[common_key][group_name].append(
+                TorchBenchModelConfig(
+                    name=model_name,
+                    device=device,
+                    test=test,
+                    batch_size=batch_size,
+                    extra_args=extra_args,
+                    extra_env=None,
+                    metrics=metrics,
+                )
+            )
+    return result
+
+
+def _common_key_to_group_key(common_key: Tuple[str, str, int, str]):
+    device, test, batch_size, precision = common_key
+    return {
+        "device": device,
+        "test": test,
+        "batch_size": batch_size,
+        "precision": precision,
+    }
 
 
-def run_benchmark_group_config(group_config_file: str, dryrun: bool=False) -> Dict[str, Dict[str, Any]]:
+def _config_result_to_group_result(group_name: str, model_name: str, metrics: Dict[str, Any], required_metrics: List[str]):
+    # output metric format: <model_set>_<model_name>[<group_name>]_<metric_name>
+    model_set = get_model_set_from_model_name(model_name)
     result = {}
+    for metric in required_metrics:
+        metric_name = f"{model_set}_{model_name}[{group_name}]_{metric}"
+        result[metric_name] = metrics[metric]
+    return result
+
+
+def run_benchmark_group_config(group_config_file: str, dryrun: bool=False) -> List[Dict[str, Any]]:
+    result = []
     with open(group_config_file, "r") as fp:
         config_obj = yaml.safe_load(fp)
-    configs = config_obj_to_model_configs(config_obj)
-    for key in configs.keys():
-        benchmark_results = [(key, run_config(configs[key][x], as_dict=True, dryrun=dryrun)) for x in configs[key].keys()]
-        result[key] = dict(benchmark_results)
+    configs: Dict[str, Dict[str, List[TorchBenchModelConfig]]] = config_obj_to_model_configs(config_obj)
+    for common_key in configs.keys():
+        group_key = _common_key_to_group_key(common_key)
+        group_result = {"group_key": group_key, "group_results": []}
+        for group_name in configs[common_key]:
+            group_result["group_results"] = [
+                _config_result_to_group_result(
+                    group_name=group_name,
+                    model_name=x.name,
+                    metrics=run_config(x, as_dict=True, dryrun=dryrun),
+                    required_metrics=x.metrics)
+                    for x in configs[common_key][group_name]
+            ]
+        result.append(group_result)
     return result
diff --git a/userbenchmark/torch-nightly/run.py b/userbenchmark/torch-nightly/run.py
index 2c8d6440b0..a108ee6f27 100644
--- a/userbenchmark/torch-nightly/run.py
+++ b/userbenchmark/torch-nightly/run.py
@@ -26,7 +26,7 @@ def run(args: List[str]):
     args = parse_args(args)
     assert os.path.exists(args.config), f"Expect an existing benchmark config file, get path: {args.config}."
     benchmark_result = get_output_json(BM_NAME, run_benchmark_config(config_file=args.config, dryrun=args.dryrun))
-    benchmark_result["environ"]["benchmark_style"] = "grouped"
+    benchmark_result["environ"]["benchmark_style"] = "group_bench"
     benchmark_result_json = json.dumps(benchmark_result, indent=4)
     with open(args.output, "w") as fp:
         fp.write(benchmark_result_json)