From 49065c2635cd2164efe72de24a57de6929059c37 Mon Sep 17 00:00:00 2001
From: FindHao <yhao24@ncsu.edu>
Date: Thu, 2 Feb 2023 11:08:08 -0500
Subject: [PATCH 01/10] add CoverageMode

---
 run.py              |  14 +++-
 run_sweep.py        |   4 +
 scripts/coverage.py | 191 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 206 insertions(+), 3 deletions(-)
 create mode 100644 scripts/coverage.py

diff --git a/run.py b/run.py
index 1cbe76d308..4e07833a8f 100644
--- a/run.py
+++ b/run.py
@@ -15,7 +15,7 @@
 
 from torchbenchmark import load_model_by_name
 import torch
-
+from scripts.coverage import CoverageMode
 WARMUP_ROUNDS = 3
 SUPPORT_DEVICE_LIST = ["cpu", "cuda"]
 if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
@@ -142,14 +142,22 @@ def run_one_step(func, nwarmup=WARMUP_ROUNDS, num_iter=10, model=None, export_me
             # second according to https://docs.python.org/3/library/time.html#time.time.
             t0 = time.time_ns()
             start_event.record()
-            func()
+            with CoverageMode(args.model, '/tmp/api_used.csv') as coverage:
+                try:
+                    func()
+                finally:
+                    coverage.commit()
             end_event.record()
             torch.cuda.synchronize()
             t1 = time.time_ns()
             result_summary.append((start_event.elapsed_time(end_event), (t1 - t0) / 1_000_000))
         elif args.device == "mps":
             t0 = time.time_ns()
-            func()
+            with CoverageMode(args.model, '/tmp/api_used.csv') as coverage:
+                try:
+                    func()
+                finally:
+                    coverage.commit()
             t1 = time.time_ns()
             wall_latency = t1 - t0
             # TODO: modify this to add GPU time as well
diff --git a/run_sweep.py b/run_sweep.py
index e9ba079186..851bd8a623 100644
--- a/run_sweep.py
+++ b/run_sweep.py
@@ -141,6 +141,7 @@ def _run_model_test(model_path: pathlib.Path, test: str, device: str, jit: bool,
     parser.add_argument("--jit", action='store_true', help="Turn on torchscript.")
     parser.add_argument("-o", "--output", type=str, default="tb-output.json", help="The default output json file.")
     parser.add_argument("--proper-bs", action='store_true', help="Find the best batch_size for current devices.")
+    parser.add_argument("--coverage", action='store_true', help="Test API coverage.")
     args, extra_args = parser.parse_known_args()
     args.models = _list_model_paths(args.models)
     results = []
@@ -152,6 +153,9 @@ def _run_model_test(model_path: pathlib.Path, test: str, device: str, jit: bool,
                 sys.exit(1)
             from scripts.proper_bs import _run_model_test_proper_bs
             r = _run_model_test_proper_bs(model_path, test, device, args.jit, batch_size=args.bs, extra_args=extra_args)
+        elif args.coverage:
+            from scripts.coverage import _run_model_test_coverage
+            r = _run_model_test_coverage(model_path, test, device, args.jit, batch_size=args.bs, extra_args=extra_args)
         else:
             r = _run_model_test(model_path, test, device, args.jit, batch_size=args.bs, extra_args=extra_args)
         results.append(r)
diff --git a/scripts/coverage.py b/scripts/coverage.py
new file mode 100644
index 0000000000..38ff6d1cc0
--- /dev/null
+++ b/scripts/coverage.py
@@ -0,0 +1,191 @@
+import os
+import pathlib
+import sys
+import time
+from typing import List, Optional, Tuple
+import torch
+import re
+from run_sweep import WORKER_TIMEOUT, WARMUP_ROUNDS, ModelTestResult, NANOSECONDS_PER_MILLISECONDS
+from torchbenchmark import ModelTask
+import numpy
+
+
+def parse_func(func):
+    description = str(func)
+    reg_method = re.compile(r"method (.*) of (.*) object")
+    reg_method2 = re.compile(r"wrapper (.*) of (.*) object")
+    reg_function = re.compile(r"function (.*)[ >]")
+    reg_class = re.compile(r"class (.*)[ >]")
+    reg_generator = re.compile(r"torch._C.Generator object at (.*)")
+    result_method = reg_method.findall(description)
+    result_function = reg_function.findall(description)
+    result_method2 = reg_method2.findall(description)
+    result_class = reg_class.findall(description)
+    result_generator = reg_generator.findall(description)
+    if result_method:
+        func_name = result_method[0][0]
+        module_name = result_method[0][1]
+    elif result_function:
+        func_name = result_function[0].split("at 0x")[0].strip()
+        module_name = ''
+    elif result_method2:
+        func_name = result_method2[0][0]
+        module_name = result_method2[0][1]
+    elif result_class:
+        func_name = result_class[0].split("at 0x")[0].strip()
+        module_name = ''
+    elif result_generator:
+        func_name = 'Generator'
+        module_name = 'torch._C'
+    else:
+        # check if the func has attribute `__module__` and `__name__`
+        if hasattr(func, '__module__'):
+            module_name = func.__module__
+        else:
+            module_name = ''
+        if hasattr(func, '__name__'):
+            func_name = func.__name__
+        else:
+            func_name = ''
+        if module_name != 'torch._ops.profiler':
+            print("not match: ", description)
+    module_name = module_name.replace("'", "")
+    func_name = func_name.replace("'", "")
+    return module_name, func_name
+
+
+def generate_API_list():
+    tmp_api_list = set()
+    tmpb = set(
+        [_ for _ in torch.overrides.get_ignored_functions() if _ not in [True, False]])
+    tmpa = set(torch.overrides.get_testing_overrides().keys())
+    raw_all_apis = tmpa.union(tmpb)
+    # collect all items' attribute  `module` to a list
+    for item in raw_all_apis:
+        module_name, func_name = parse_func(item)
+        # if (module_name, func_name) in api_list:
+        # print("duplicated: ", (module_name, func_name))
+        tmp_api_list.add((module_name, func_name))
+    return tmp_api_list
+
+API_LIST = generate_API_list()
+
+
+class CoverageMode(torch.overrides.TorchFunctionMode):
+
+    def __init__(self, model='', output_file=None):
+        self.model = model
+        self.seen = set()
+        self.api_used = set()
+        self.output_file = output_file
+
+    def check_func_in_APIs(self, func):
+        module_name, func_name = parse_func(func)
+        if (module_name, func_name) not in API_LIST and module_name != 'torch._ops.profiler':
+            print("not in APIs: (%s, %s)" % (module_name, func_name))
+        else:
+            self.api_used.add((module_name, func_name))
+            # debug
+            # print("in APIs: ", (module_name, func_name))
+
+    def get_api_coverage_rate(self):
+        return len(self.api_used) / len(API_LIST)
+
+    def __torch_function__(self, func, types, args=(), kwargs=None):
+        self.seen.add(func)
+        if kwargs is None:
+            kwargs = {}
+        self.check_func_in_APIs(func)
+        return func(*args, **kwargs)
+
+    def commit(self):
+        if self.output_file:
+            with open(self.output_file, 'a') as f:
+                for api in self.api_used:
+                    f.write("%s,%s\n" % (api[0], api[1]))
+
+
+def run_one_step(model, func, device: str, nwarmup=WARMUP_ROUNDS, num_iter=10) -> Tuple[float, Optional[Tuple[torch.Tensor]]]:
+    "Run one step of the model, and return the latency in milliseconds."
+    # Warm-up `nwarmup` rounds
+    for _i in range(nwarmup):
+        func()
+    result_summary = []
+    for _i in range(num_iter):
+        if device == "cuda":
+            torch.cuda.synchronize()
+            # Collect time_ns() instead of time() which does not provide better precision than 1
+            # second according to https://docs.python.org/3/library/time.html#time.time.
+            t0 = time.time_ns()
+            with CoverageMode(model, '/tmp/api_used.csv') as coverage:
+                try:
+                    func()
+                finally:
+                    coverage.commit()
+            torch.cuda.synchronize()  # Wait for the events to be recorded!
+            t1 = time.time_ns()
+        else:
+            t0 = time.time_ns()
+            with CoverageMode(model, '/tmp/api_used.csv') as coverage:
+                try:
+                    func()
+                finally:
+                    coverage.commit()
+            t1 = time.time_ns()
+        result_summary.append((t1 - t0) / NANOSECONDS_PER_MILLISECONDS)
+    wall_latency = numpy.median(result_summary)
+    return wall_latency
+
+
+def _run_model_test_coverage(model_path: pathlib.Path, test: str, device: str, jit: bool, batch_size: Optional[int], extra_args: List[str]) :
+    assert test == "train" or test == "eval", f"Test must be either 'train' or 'eval', but get {test}."
+    result = ModelTestResult(name=model_path.name, test=test, device=device, extra_args=extra_args, batch_size=None, precision="fp32",
+                             status="OK", results={})
+
+    # Run the benchmark test in a separate process
+    print(f"Running model {model_path.name} ... ", end='', flush=True)
+    status: str = "OK"
+    bs_name = "batch_size"
+    correctness_name = "correctness"
+    error_message: Optional[str] = None
+    try:
+        task = ModelTask(os.path.basename(model_path), timeout=WORKER_TIMEOUT)
+        if not task.model_details.exists:
+            status = "NotExist"
+            return
+        task.make_model_instance(test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args)
+        # Check the batch size in the model matches the specified value
+        result.batch_size = task.get_model_attribute(bs_name)
+        result.precision = task.get_model_attribute("dargs", "precision")
+        if batch_size and (not result.batch_size == batch_size):
+            raise ValueError(
+                f"User specify batch size {batch_size}, but model {result.name} runs with batch size {result.batch_size}. Please report a bug.")
+        result.results["latency_ms"] = run_one_step(model_path.name, task.invoke, device)
+        # if NUM_BATCHES is set, update to per-batch latencies
+        num_batches = task.get_model_attribute("NUM_BATCHES")
+        if num_batches:
+            result.results["latency_ms"] = result.results["latency_ms"] / num_batches
+        # if the model provides eager eval result, save it for cosine similarity
+        correctness = task.get_model_attribute(correctness_name)
+        if correctness is not None:
+            result.results[correctness_name] = str(correctness)
+    except NotImplementedError as e:
+        status = "NotImplemented"
+        error_message = str(e)
+    except TypeError as e:  # TypeError is raised when the model doesn't support variable batch sizes
+        status = "TypeError"
+        error_message = str(e)
+    except KeyboardInterrupt as e:
+        status = "UserInterrupted"
+        error_message = str(e)
+    except Exception as e:
+        status = f"{type(e).__name__}"
+        error_message = str(e)
+    finally:
+        print(f"[ {status} ]")
+        result.status = status
+        if error_message:
+            result.results["error_message"] = error_message
+        if status == "UserInterrupted":
+            sys.exit(1)
+        return result

From 5ee87c2316bfbc33031a5ee200b3f53359548174 Mon Sep 17 00:00:00 2001
From: FindHao <yhao24@ncsu.edu>
Date: Thu, 2 Feb 2023 16:54:49 -0500
Subject: [PATCH 02/10] create a new userbenchmark

---
 run.py                                 |  14 +-
 run_sweep.py                           |   4 -
 scripts/coverage.py                    | 191 ----------------------
 userbenchmark/api-coverage/__init__.py | 218 +++++++++++++++++++++++++
 4 files changed, 221 insertions(+), 206 deletions(-)
 delete mode 100644 scripts/coverage.py
 create mode 100644 userbenchmark/api-coverage/__init__.py

diff --git a/run.py b/run.py
index 4e07833a8f..1cbe76d308 100644
--- a/run.py
+++ b/run.py
@@ -15,7 +15,7 @@
 
 from torchbenchmark import load_model_by_name
 import torch
-from scripts.coverage import CoverageMode
+
 WARMUP_ROUNDS = 3
 SUPPORT_DEVICE_LIST = ["cpu", "cuda"]
 if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
@@ -142,22 +142,14 @@ def run_one_step(func, nwarmup=WARMUP_ROUNDS, num_iter=10, model=None, export_me
             # second according to https://docs.python.org/3/library/time.html#time.time.
             t0 = time.time_ns()
             start_event.record()
-            with CoverageMode(args.model, '/tmp/api_used.csv') as coverage:
-                try:
-                    func()
-                finally:
-                    coverage.commit()
+            func()
             end_event.record()
             torch.cuda.synchronize()
             t1 = time.time_ns()
             result_summary.append((start_event.elapsed_time(end_event), (t1 - t0) / 1_000_000))
         elif args.device == "mps":
             t0 = time.time_ns()
-            with CoverageMode(args.model, '/tmp/api_used.csv') as coverage:
-                try:
-                    func()
-                finally:
-                    coverage.commit()
+            func()
             t1 = time.time_ns()
             wall_latency = t1 - t0
             # TODO: modify this to add GPU time as well
diff --git a/run_sweep.py b/run_sweep.py
index 851bd8a623..e9ba079186 100644
--- a/run_sweep.py
+++ b/run_sweep.py
@@ -141,7 +141,6 @@ def _run_model_test(model_path: pathlib.Path, test: str, device: str, jit: bool,
     parser.add_argument("--jit", action='store_true', help="Turn on torchscript.")
     parser.add_argument("-o", "--output", type=str, default="tb-output.json", help="The default output json file.")
     parser.add_argument("--proper-bs", action='store_true', help="Find the best batch_size for current devices.")
-    parser.add_argument("--coverage", action='store_true', help="Test API coverage.")
     args, extra_args = parser.parse_known_args()
     args.models = _list_model_paths(args.models)
     results = []
@@ -153,9 +152,6 @@ def _run_model_test(model_path: pathlib.Path, test: str, device: str, jit: bool,
                 sys.exit(1)
             from scripts.proper_bs import _run_model_test_proper_bs
             r = _run_model_test_proper_bs(model_path, test, device, args.jit, batch_size=args.bs, extra_args=extra_args)
-        elif args.coverage:
-            from scripts.coverage import _run_model_test_coverage
-            r = _run_model_test_coverage(model_path, test, device, args.jit, batch_size=args.bs, extra_args=extra_args)
         else:
             r = _run_model_test(model_path, test, device, args.jit, batch_size=args.bs, extra_args=extra_args)
         results.append(r)
diff --git a/scripts/coverage.py b/scripts/coverage.py
deleted file mode 100644
index 38ff6d1cc0..0000000000
--- a/scripts/coverage.py
+++ /dev/null
@@ -1,191 +0,0 @@
-import os
-import pathlib
-import sys
-import time
-from typing import List, Optional, Tuple
-import torch
-import re
-from run_sweep import WORKER_TIMEOUT, WARMUP_ROUNDS, ModelTestResult, NANOSECONDS_PER_MILLISECONDS
-from torchbenchmark import ModelTask
-import numpy
-
-
-def parse_func(func):
-    description = str(func)
-    reg_method = re.compile(r"method (.*) of (.*) object")
-    reg_method2 = re.compile(r"wrapper (.*) of (.*) object")
-    reg_function = re.compile(r"function (.*)[ >]")
-    reg_class = re.compile(r"class (.*)[ >]")
-    reg_generator = re.compile(r"torch._C.Generator object at (.*)")
-    result_method = reg_method.findall(description)
-    result_function = reg_function.findall(description)
-    result_method2 = reg_method2.findall(description)
-    result_class = reg_class.findall(description)
-    result_generator = reg_generator.findall(description)
-    if result_method:
-        func_name = result_method[0][0]
-        module_name = result_method[0][1]
-    elif result_function:
-        func_name = result_function[0].split("at 0x")[0].strip()
-        module_name = ''
-    elif result_method2:
-        func_name = result_method2[0][0]
-        module_name = result_method2[0][1]
-    elif result_class:
-        func_name = result_class[0].split("at 0x")[0].strip()
-        module_name = ''
-    elif result_generator:
-        func_name = 'Generator'
-        module_name = 'torch._C'
-    else:
-        # check if the func has attribute `__module__` and `__name__`
-        if hasattr(func, '__module__'):
-            module_name = func.__module__
-        else:
-            module_name = ''
-        if hasattr(func, '__name__'):
-            func_name = func.__name__
-        else:
-            func_name = ''
-        if module_name != 'torch._ops.profiler':
-            print("not match: ", description)
-    module_name = module_name.replace("'", "")
-    func_name = func_name.replace("'", "")
-    return module_name, func_name
-
-
-def generate_API_list():
-    tmp_api_list = set()
-    tmpb = set(
-        [_ for _ in torch.overrides.get_ignored_functions() if _ not in [True, False]])
-    tmpa = set(torch.overrides.get_testing_overrides().keys())
-    raw_all_apis = tmpa.union(tmpb)
-    # collect all items' attribute  `module` to a list
-    for item in raw_all_apis:
-        module_name, func_name = parse_func(item)
-        # if (module_name, func_name) in api_list:
-        # print("duplicated: ", (module_name, func_name))
-        tmp_api_list.add((module_name, func_name))
-    return tmp_api_list
-
-API_LIST = generate_API_list()
-
-
-class CoverageMode(torch.overrides.TorchFunctionMode):
-
-    def __init__(self, model='', output_file=None):
-        self.model = model
-        self.seen = set()
-        self.api_used = set()
-        self.output_file = output_file
-
-    def check_func_in_APIs(self, func):
-        module_name, func_name = parse_func(func)
-        if (module_name, func_name) not in API_LIST and module_name != 'torch._ops.profiler':
-            print("not in APIs: (%s, %s)" % (module_name, func_name))
-        else:
-            self.api_used.add((module_name, func_name))
-            # debug
-            # print("in APIs: ", (module_name, func_name))
-
-    def get_api_coverage_rate(self):
-        return len(self.api_used) / len(API_LIST)
-
-    def __torch_function__(self, func, types, args=(), kwargs=None):
-        self.seen.add(func)
-        if kwargs is None:
-            kwargs = {}
-        self.check_func_in_APIs(func)
-        return func(*args, **kwargs)
-
-    def commit(self):
-        if self.output_file:
-            with open(self.output_file, 'a') as f:
-                for api in self.api_used:
-                    f.write("%s,%s\n" % (api[0], api[1]))
-
-
-def run_one_step(model, func, device: str, nwarmup=WARMUP_ROUNDS, num_iter=10) -> Tuple[float, Optional[Tuple[torch.Tensor]]]:
-    "Run one step of the model, and return the latency in milliseconds."
-    # Warm-up `nwarmup` rounds
-    for _i in range(nwarmup):
-        func()
-    result_summary = []
-    for _i in range(num_iter):
-        if device == "cuda":
-            torch.cuda.synchronize()
-            # Collect time_ns() instead of time() which does not provide better precision than 1
-            # second according to https://docs.python.org/3/library/time.html#time.time.
-            t0 = time.time_ns()
-            with CoverageMode(model, '/tmp/api_used.csv') as coverage:
-                try:
-                    func()
-                finally:
-                    coverage.commit()
-            torch.cuda.synchronize()  # Wait for the events to be recorded!
-            t1 = time.time_ns()
-        else:
-            t0 = time.time_ns()
-            with CoverageMode(model, '/tmp/api_used.csv') as coverage:
-                try:
-                    func()
-                finally:
-                    coverage.commit()
-            t1 = time.time_ns()
-        result_summary.append((t1 - t0) / NANOSECONDS_PER_MILLISECONDS)
-    wall_latency = numpy.median(result_summary)
-    return wall_latency
-
-
-def _run_model_test_coverage(model_path: pathlib.Path, test: str, device: str, jit: bool, batch_size: Optional[int], extra_args: List[str]) :
-    assert test == "train" or test == "eval", f"Test must be either 'train' or 'eval', but get {test}."
-    result = ModelTestResult(name=model_path.name, test=test, device=device, extra_args=extra_args, batch_size=None, precision="fp32",
-                             status="OK", results={})
-
-    # Run the benchmark test in a separate process
-    print(f"Running model {model_path.name} ... ", end='', flush=True)
-    status: str = "OK"
-    bs_name = "batch_size"
-    correctness_name = "correctness"
-    error_message: Optional[str] = None
-    try:
-        task = ModelTask(os.path.basename(model_path), timeout=WORKER_TIMEOUT)
-        if not task.model_details.exists:
-            status = "NotExist"
-            return
-        task.make_model_instance(test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args)
-        # Check the batch size in the model matches the specified value
-        result.batch_size = task.get_model_attribute(bs_name)
-        result.precision = task.get_model_attribute("dargs", "precision")
-        if batch_size and (not result.batch_size == batch_size):
-            raise ValueError(
-                f"User specify batch size {batch_size}, but model {result.name} runs with batch size {result.batch_size}. Please report a bug.")
-        result.results["latency_ms"] = run_one_step(model_path.name, task.invoke, device)
-        # if NUM_BATCHES is set, update to per-batch latencies
-        num_batches = task.get_model_attribute("NUM_BATCHES")
-        if num_batches:
-            result.results["latency_ms"] = result.results["latency_ms"] / num_batches
-        # if the model provides eager eval result, save it for cosine similarity
-        correctness = task.get_model_attribute(correctness_name)
-        if correctness is not None:
-            result.results[correctness_name] = str(correctness)
-    except NotImplementedError as e:
-        status = "NotImplemented"
-        error_message = str(e)
-    except TypeError as e:  # TypeError is raised when the model doesn't support variable batch sizes
-        status = "TypeError"
-        error_message = str(e)
-    except KeyboardInterrupt as e:
-        status = "UserInterrupted"
-        error_message = str(e)
-    except Exception as e:
-        status = f"{type(e).__name__}"
-        error_message = str(e)
-    finally:
-        print(f"[ {status} ]")
-        result.status = status
-        if error_message:
-            result.results["error_message"] = error_message
-        if status == "UserInterrupted":
-            sys.exit(1)
-        return result
diff --git a/userbenchmark/api-coverage/__init__.py b/userbenchmark/api-coverage/__init__.py
new file mode 100644
index 0000000000..fce09257a6
--- /dev/null
+++ b/userbenchmark/api-coverage/__init__.py
@@ -0,0 +1,218 @@
+import itertools
+import time
+from datetime import datetime
+from typing import List
+import json
+import numpy as np
+import argparse
+import re
+import torch
+
+from ..utils import REPO_PATH, add_path, get_output_dir, get_output_json, dump_output
+
+with add_path(REPO_PATH):
+    from torchbenchmark.util.experiment.instantiator import list_models, load_model, TorchBenchModelConfig
+    from torchbenchmark.util.experiment.metrics import TorchBenchModelMetrics, get_model_test_metrics
+
+BM_NAME = "api-coverage"
+
+
+def parse_func(func):
+    description = str(func)
+    reg_method = re.compile(r"method (.*) of (.*) object")
+    reg_method2 = re.compile(r"wrapper (.*) of (.*) object")
+    reg_function = re.compile(r"function (.*)[ >]")
+    reg_class = re.compile(r"class (.*)[ >]")
+    reg_generator = re.compile(r"torch._C.Generator object at (.*)")
+    result_method = reg_method.findall(description)
+    result_function = reg_function.findall(description)
+    result_method2 = reg_method2.findall(description)
+    result_class = reg_class.findall(description)
+    result_generator = reg_generator.findall(description)
+    if result_method:
+        func_name = result_method[0][0]
+        module_name = result_method[0][1]
+    elif result_function:
+        func_name = result_function[0].split("at 0x")[0].strip()
+        module_name = ''
+    elif result_method2:
+        func_name = result_method2[0][0]
+        module_name = result_method2[0][1]
+    elif result_class:
+        func_name = result_class[0].split("at 0x")[0].strip()
+        module_name = ''
+    elif result_generator:
+        func_name = 'Generator'
+        module_name = 'torch._C'
+    else:
+        # check if the func has attribute `__module__` and `__name__`
+        if hasattr(func, '__module__'):
+            module_name = func.__module__
+        else:
+            module_name = ''
+        if hasattr(func, '__name__'):
+            func_name = func.__name__
+        else:
+            func_name = ''
+        if module_name != 'torch._ops.profiler':
+            print("not match: ", description)
+    module_name = module_name.replace("'", "")
+    func_name = func_name.replace("'", "")
+    return module_name, func_name
+
+
+def generate_API_list():
+    tmp_api_list = set()
+    raw_all_apis = set(torch.overrides.get_testing_overrides().keys())
+    # collect all items' attribute  `module` to a list
+    for item in raw_all_apis:
+        module_name, func_name = parse_func(item)
+        # if (module_name, func_name) in api_list:
+        # print("duplicated: ", (module_name, func_name))
+        tmp_api_list.add((module_name, func_name))
+    ignored_funcs = set([_ for _ in torch.overrides.get_ignored_functions() if _ not in [True, False]])
+    tmp_ignored_api_list = set()
+    for item in ignored_funcs:
+        module_name, func_name = parse_func(item)
+        tmp_ignored_api_list.add((module_name, func_name))
+    return tmp_api_list, tmp_ignored_api_list
+
+API_LIST, IGNORED_API_LIST = generate_API_list()
+
+
+class CoverageMode(torch.overrides.TorchFunctionMode):
+
+    def __init__(self, model='', output_file=None):
+        self.model = model
+        self.seen = set()
+        self.api_used = set()
+        self.output_file = output_file
+
+    def check_func_in_APIs(self, func):
+        module_name, func_name = parse_func(func)
+        if (module_name, func_name) not in API_LIST and (module_name, func_name) not in IGNORED_API_LIST and module_name != 'torch._ops.profiler':
+            raise RuntimeError("not in APIs: (%s, %s)" % (module_name, func_name))
+            print("not in APIs: (%s, %s)" % (module_name, func_name))
+        else:
+            self.api_used.add((module_name, func_name))
+            # debug
+            # print("in APIs: ", (module_name, func_name))
+
+    def get_api_coverage_rate(self):
+        return len(self.api_used) / len(API_LIST)
+
+    def __torch_function__(self, func, types, args=(), kwargs=None):
+        self.seen.add(func)
+        if kwargs is None:
+            kwargs = {}
+        self.check_func_in_APIs(func)
+        return func(*args, **kwargs)
+
+    def commit(self):
+        if self.output_file:
+            with open(self.output_file, 'a') as f:
+                for api in self.api_used:
+                    f.write("%s,%s\n" % (api[0], api[1]))
+
+    def update_output(self, output: set):
+        for api in self.api_used:
+            output.add(api)
+
+
+def generate_model_config(model_name: str) -> List[TorchBenchModelConfig]:
+    devices = ["cpu", "cuda"]
+    tests = ["train", "eval"]
+    cfgs = itertools.product(*[devices, tests])
+    result = [TorchBenchModelConfig(
+        name=model_name,
+        device=device,
+        test=test,
+        batch_size=None,
+        jit=False,
+        extra_args=[],
+        extra_env=None,
+    ) for device, test in cfgs]
+    return result
+
+
+def parse_args(args: List[str]):
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-m", "--models", default="",
+                        help="Specify the models to run, default (empty) runs all models.")
+    parser.add_argument("-d", "--device", default="cuda", help="Specify the device.")
+    parser.add_argument("-t", "--test", default="eval", help="Specify the test.")
+    parser.add_argument("-o", "--output", type=str, help="The default output json file.")
+    args = parser.parse_args(args)
+    return args
+
+
+def generate_filter(args: argparse.Namespace):
+    allowed_models = args.models
+    if allowed_models:
+        allowed_models = allowed_models.split(",") if "," in allowed_models else [allowed_models]
+    allowed_devices = args.device
+    allowed_devices = allowed_devices.split(",") if "," in allowed_devices else [allowed_devices]
+    allowed_tests = args.test
+    allowed_tests = allowed_tests.split(",") if "," in allowed_tests else [allowed_tests]
+
+    def cfg_filter(cfg: TorchBenchModelConfig) -> bool:
+        if cfg.device in allowed_devices and cfg.test in allowed_tests:
+            if not allowed_models:
+                return True
+            else:
+                return cfg.name in allowed_models
+        return False
+    return cfg_filter
+
+
+def run(args: List[str]):
+    args = parse_args(args)
+    output_dir = get_output_dir(BM_NAME)
+    models = list_models()
+    cfgs = list(itertools.chain(*map(generate_model_config, models)))
+    cfg_filter = generate_filter(args)
+    single_round_result = []
+    api_used = set()
+    for cfg in filter(cfg_filter, cfgs):
+        try:
+            print(cfg.name)
+            # if cfg.name in ['doctr_det_predictor', 'doctr_reco_predictor']:
+            #     continue
+            # load the model instance within the same process
+            model = load_model(cfg)
+            # get the model test metrics
+            with CoverageMode('', '') as coverage:
+                try:
+                    get_model_test_metrics(model)
+                finally:
+                    coverage.update_output(api_used)
+        except NotImplementedError:
+            # some models don't implement the test specified
+            single_round_result.append({
+                'cfg': cfg.__dict__,
+                'raw_metrics': "NotImplemented",
+            })
+        except RuntimeError as e:
+            single_round_result.append({
+                'cfg': cfg.__dict__,
+                'raw_metrics': f"RuntimeError: {e}",
+            })
+
+    # reduce full results to metrics
+    # log detailed results in the .userbenchmark/model-stableness/logs/ directory
+    log_dir = output_dir.joinpath("logs")
+    log_dir.mkdir(exist_ok=True, parents=True)
+    fname = "logs-{}.json".format(datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S"))
+    full_fname = log_dir.joinpath(fname)
+    with open(full_fname, 'w') as f:
+        json.dump(single_round_result, f, indent=4)
+    # log the api coverage
+    api_coverage_fname = log_dir.joinpath("%s-api_coverage.csv" % fname)
+    missed_apis = API_LIST - api_used
+    with open(api_coverage_fname, 'w') as f:
+        f.write("API coverage rate: %d/%d = %.2f%%\n" %
+                (len(api_used), len(API_LIST), len(api_used) / len(API_LIST) * 100))
+        f.write("missed APIs:\n")
+        f.write("module_name,func_name\n")
+        for api in missed_apis:
+            f.write("%s,%s\n" % (api[0], api[1]))

From 4033206d8dc5eebe81259605847cb5ff8ec023bc Mon Sep 17 00:00:00 2001
From: FindHao <yhao24@ncsu.edu>
Date: Thu, 2 Feb 2023 16:58:10 -0500
Subject: [PATCH 03/10] enable train,eval for cuda,cpu by default

---
 userbenchmark/api-coverage/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/userbenchmark/api-coverage/__init__.py b/userbenchmark/api-coverage/__init__.py
index fce09257a6..758caa85b4 100644
--- a/userbenchmark/api-coverage/__init__.py
+++ b/userbenchmark/api-coverage/__init__.py
@@ -139,8 +139,8 @@ def parse_args(args: List[str]):
     parser = argparse.ArgumentParser()
     parser.add_argument("-m", "--models", default="",
                         help="Specify the models to run, default (empty) runs all models.")
-    parser.add_argument("-d", "--device", default="cuda", help="Specify the device.")
-    parser.add_argument("-t", "--test", default="eval", help="Specify the test.")
+    parser.add_argument("-d", "--device", default="cuda,cpu", help="Specify the device.")
+    parser.add_argument("-t", "--test", default="eval,train", help="Specify the test.")
     parser.add_argument("-o", "--output", type=str, help="The default output json file.")
     args = parser.parse_args(args)
     return args

From 03b9651065aaf3907984f2ad4c00a9ca5fc27115 Mon Sep 17 00:00:00 2001
From: FindHao <yhao24@ncsu.edu>
Date: Thu, 2 Feb 2023 17:01:45 -0500
Subject: [PATCH 04/10] set execution iterations to 1

---
 userbenchmark/api-coverage/__init__.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/userbenchmark/api-coverage/__init__.py b/userbenchmark/api-coverage/__init__.py
index 758caa85b4..eb430d4f82 100644
--- a/userbenchmark/api-coverage/__init__.py
+++ b/userbenchmark/api-coverage/__init__.py
@@ -13,6 +13,7 @@
 with add_path(REPO_PATH):
     from torchbenchmark.util.experiment.instantiator import list_models, load_model, TorchBenchModelConfig
     from torchbenchmark.util.experiment.metrics import TorchBenchModelMetrics, get_model_test_metrics
+    import torchbenchmark.util.experiment.metrics
 
 BM_NAME = "api-coverage"
 
@@ -171,11 +172,13 @@ def run(args: List[str]):
     models = list_models()
     cfgs = list(itertools.chain(*map(generate_model_config, models)))
     cfg_filter = generate_filter(args)
+    torchbenchmark.util.experiment.metrics.BENCHMARK_ITERS = 1
+    torchbenchmark.util.experiment.metrics.WARMUP_ROUNDS = 0
     single_round_result = []
     api_used = set()
     for cfg in filter(cfg_filter, cfgs):
         try:
-            print(cfg.name)
+            # print(cfg.name)
             # if cfg.name in ['doctr_det_predictor', 'doctr_reco_predictor']:
             #     continue
             # load the model instance within the same process

From 8814834d126a16b8880143396e797bc833b35f07 Mon Sep 17 00:00:00 2001
From: FindHao <yhao24@ncsu.edu>
Date: Thu, 2 Feb 2023 18:11:14 -0500
Subject: [PATCH 05/10] user a better way to parse function

---
 userbenchmark/api-coverage/__init__.py | 51 ++++++--------------------
 1 file changed, 12 insertions(+), 39 deletions(-)

diff --git a/userbenchmark/api-coverage/__init__.py b/userbenchmark/api-coverage/__init__.py
index eb430d4f82..a1ef4b5f9b 100644
--- a/userbenchmark/api-coverage/__init__.py
+++ b/userbenchmark/api-coverage/__init__.py
@@ -19,46 +19,19 @@
 
 
 def parse_func(func):
-    description = str(func)
-    reg_method = re.compile(r"method (.*) of (.*) object")
-    reg_method2 = re.compile(r"wrapper (.*) of (.*) object")
-    reg_function = re.compile(r"function (.*)[ >]")
-    reg_class = re.compile(r"class (.*)[ >]")
-    reg_generator = re.compile(r"torch._C.Generator object at (.*)")
-    result_method = reg_method.findall(description)
-    result_function = reg_function.findall(description)
-    result_method2 = reg_method2.findall(description)
-    result_class = reg_class.findall(description)
-    result_generator = reg_generator.findall(description)
-    if result_method:
-        func_name = result_method[0][0]
-        module_name = result_method[0][1]
-    elif result_function:
-        func_name = result_function[0].split("at 0x")[0].strip()
-        module_name = ''
-    elif result_method2:
-        func_name = result_method2[0][0]
-        module_name = result_method2[0][1]
-    elif result_class:
-        func_name = result_class[0].split("at 0x")[0].strip()
-        module_name = ''
-    elif result_generator:
-        func_name = 'Generator'
-        module_name = 'torch._C'
+    if hasattr(func, '__module__'):
+        module_name = func.__module__
+        func_name = func.__name__
     else:
-        # check if the func has attribute `__module__` and `__name__`
-        if hasattr(func, '__module__'):
-            module_name = func.__module__
-        else:
+        if hasattr(func, '__qualname__'):
+            func_name = func.__qualname__
             module_name = ''
-        if hasattr(func, '__name__'):
-            func_name = func.__name__
         else:
-            func_name = ''
-        if module_name != 'torch._ops.profiler':
-            print("not match: ", description)
-    module_name = module_name.replace("'", "")
-    func_name = func_name.replace("'", "")
+            if type(func) == torch._C.Generator:
+                func_name = 'torch._C.Generator'
+                module_name = ''
+            else:
+                raise RuntimeError("no matched moduel and func name: ", func, type(func))
     return module_name, func_name
 
 
@@ -179,8 +152,8 @@ def run(args: List[str]):
     for cfg in filter(cfg_filter, cfgs):
         try:
             # print(cfg.name)
-            # if cfg.name in ['doctr_det_predictor', 'doctr_reco_predictor']:
-            #     continue
+            if cfg.name in ['doctr_det_predictor', 'doctr_reco_predictor']:
+                continue
             # load the model instance within the same process
             model = load_model(cfg)
             # get the model test metrics

From dc6caaf8e62ed087eb3d119dda924ffab190cdc5 Mon Sep 17 00:00:00 2001
From: FindHao <yhao24@ncsu.edu>
Date: Fri, 3 Feb 2023 10:43:34 -0500
Subject: [PATCH 06/10] fix typo and remove debug tests

---
 userbenchmark/api-coverage/__init__.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/userbenchmark/api-coverage/__init__.py b/userbenchmark/api-coverage/__init__.py
index a1ef4b5f9b..2353017fe0 100644
--- a/userbenchmark/api-coverage/__init__.py
+++ b/userbenchmark/api-coverage/__init__.py
@@ -31,7 +31,7 @@ def parse_func(func):
                 func_name = 'torch._C.Generator'
                 module_name = ''
             else:
-                raise RuntimeError("no matched moduel and func name: ", func, type(func))
+                raise RuntimeError("no matched module and func name: ", func, type(func))
     return module_name, func_name
 
 
@@ -152,8 +152,8 @@ def run(args: List[str]):
     for cfg in filter(cfg_filter, cfgs):
         try:
             # print(cfg.name)
-            if cfg.name in ['doctr_det_predictor', 'doctr_reco_predictor']:
-                continue
+            # if cfg.name in ['doctr_det_predictor', 'doctr_reco_predictor']:
+            #     continue
             # load the model instance within the same process
             model = load_model(cfg)
             # get the model test metrics

From e3c8bb390c6eaa7a2ca76db0d134903784b90afc Mon Sep 17 00:00:00 2001
From: FindHao <yhao24@ncsu.edu>
Date: Mon, 6 Feb 2023 10:54:09 -0500
Subject: [PATCH 07/10] fix bug for api coverage computation

---
 userbenchmark/api-coverage/__init__.py | 33 +++++++++++++++++++-------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/userbenchmark/api-coverage/__init__.py b/userbenchmark/api-coverage/__init__.py
index 2353017fe0..0414fe65ee 100644
--- a/userbenchmark/api-coverage/__init__.py
+++ b/userbenchmark/api-coverage/__init__.py
@@ -61,12 +61,14 @@ def __init__(self, model='', output_file=None):
         self.seen = set()
         self.api_used = set()
         self.output_file = output_file
+        self.api_need_support = set()
 
     def check_func_in_APIs(self, func):
         module_name, func_name = parse_func(func)
-        if (module_name, func_name) not in API_LIST and (module_name, func_name) not in IGNORED_API_LIST and module_name != 'torch._ops.profiler':
-            raise RuntimeError("not in APIs: (%s, %s)" % (module_name, func_name))
-            print("not in APIs: (%s, %s)" % (module_name, func_name))
+        if (module_name, func_name) not in API_LIST:
+            if (module_name, func_name) not in IGNORED_API_LIST and module_name != 'torch._ops.profiler':
+                print("not in API_LIST or IGNORED_API_LIST: (%s, %s)" % (module_name, func_name))
+                self.api_need_support.add((module_name, func_name))
         else:
             self.api_used.add((module_name, func_name))
             # debug
@@ -88,9 +90,13 @@ def commit(self):
                 for api in self.api_used:
                     f.write("%s,%s\n" % (api[0], api[1]))
 
-    def update_output(self, output: set):
+    def update_api_used(self, output: set):
         for api in self.api_used:
             output.add(api)
+    
+    def update_need_support(self, output: set):
+        for api in self.api_need_support:
+            output.add(api)
 
 
 def generate_model_config(model_name: str) -> List[TorchBenchModelConfig]:
@@ -113,7 +119,7 @@ def parse_args(args: List[str]):
     parser = argparse.ArgumentParser()
     parser.add_argument("-m", "--models", default="",
                         help="Specify the models to run, default (empty) runs all models.")
-    parser.add_argument("-d", "--device", default="cuda,cpu", help="Specify the device.")
+    parser.add_argument("-d", "--device", default="cpu", help="Specify the device.")
     parser.add_argument("-t", "--test", default="eval,train", help="Specify the test.")
     parser.add_argument("-o", "--output", type=str, help="The default output json file.")
     args = parser.parse_args(args)
@@ -149,11 +155,12 @@ def run(args: List[str]):
     torchbenchmark.util.experiment.metrics.WARMUP_ROUNDS = 0
     single_round_result = []
     api_used = set()
+    api_need_support = set()
     for cfg in filter(cfg_filter, cfgs):
         try:
-            # print(cfg.name)
-            # if cfg.name in ['doctr_det_predictor', 'doctr_reco_predictor']:
-            #     continue
+            print(cfg.name)
+            if cfg.name in ['doctr_det_predictor', 'doctr_reco_predictor', 'densenet121', 'resnet152']:
+                continue
             # load the model instance within the same process
             model = load_model(cfg)
             # get the model test metrics
@@ -161,7 +168,8 @@ def run(args: List[str]):
                 try:
                     get_model_test_metrics(model)
                 finally:
-                    coverage.update_output(api_used)
+                    coverage.update_api_used(api_used)
+                    coverage.update_need_support(api_need_support)
         except NotImplementedError:
             # some models don't implement the test specified
             single_round_result.append({
@@ -192,3 +200,10 @@ def run(args: List[str]):
         f.write("module_name,func_name\n")
         for api in missed_apis:
             f.write("%s,%s\n" % (api[0], api[1]))
+    if api_need_support:
+        api_need_support_fname = log_dir.joinpath("%s-api_need_support.csv" % fname)
+        with open(api_need_support_fname, 'w') as f:
+            f.write("APIs called but not in API_LIST and IGNORED_API_LIST\n")
+            f.write("module_name,func_name\n")
+            for api in api_need_support:
+                f.write("%s,%s\n" % (api[0], api[1]))

From b61be7bea8d4bf31bb3781a9b0d17c2f9a055608 Mon Sep 17 00:00:00 2001
From: FindHao <yhao24@ncsu.edu>
Date: Mon, 6 Feb 2023 14:38:18 -0500
Subject: [PATCH 08/10] remove duplicate output

---
 userbenchmark/api-coverage/__init__.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/userbenchmark/api-coverage/__init__.py b/userbenchmark/api-coverage/__init__.py
index 0414fe65ee..ab11d6ef3f 100644
--- a/userbenchmark/api-coverage/__init__.py
+++ b/userbenchmark/api-coverage/__init__.py
@@ -67,8 +67,10 @@ def check_func_in_APIs(self, func):
         module_name, func_name = parse_func(func)
         if (module_name, func_name) not in API_LIST:
             if (module_name, func_name) not in IGNORED_API_LIST and module_name != 'torch._ops.profiler':
-                print("not in API_LIST or IGNORED_API_LIST: (%s, %s)" % (module_name, func_name))
-                self.api_need_support.add((module_name, func_name))
+                new_pair = (module_name, func_name)
+                if new_pair not in self.api_need_support:
+                    print("not in API_LIST or IGNORED_API_LIST: (%s, %s)" % (module_name, func_name))
+                    self.api_need_support.add((module_name, func_name))
         else:
             self.api_used.add((module_name, func_name))
             # debug

From 6c002f5fd20c48c7c37f7f341254e4c5aff9c5d7 Mon Sep 17 00:00:00 2001
From: Yueming Hao <yhao24@ncsu.edu>
Date: Wed, 8 Feb 2023 09:47:07 -0500
Subject: [PATCH 09/10] remove debug code

---
 userbenchmark/api-coverage/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/userbenchmark/api-coverage/__init__.py b/userbenchmark/api-coverage/__init__.py
index ab11d6ef3f..002226a105 100644
--- a/userbenchmark/api-coverage/__init__.py
+++ b/userbenchmark/api-coverage/__init__.py
@@ -161,8 +161,8 @@ def run(args: List[str]):
     for cfg in filter(cfg_filter, cfgs):
         try:
             print(cfg.name)
-            if cfg.name in ['doctr_det_predictor', 'doctr_reco_predictor', 'densenet121', 'resnet152']:
-                continue
+            # if cfg.name in ['doctr_det_predictor', 'doctr_reco_predictor', 'densenet121', 'resnet152']:
+            #     continue
             # load the model instance within the same process
             model = load_model(cfg)
             # get the model test metrics

From 27ae23898b77edfa0898b78b6cf165daf9f5c362 Mon Sep 17 00:00:00 2001
From: FindHao <find@findhao.net>
Date: Fri, 14 Jul 2023 13:27:39 -0700
Subject: [PATCH 10/10] rebase

---
 userbenchmark/api-coverage/__init__.py | 210 ------------------------
 userbenchmark/api-coverage/run.py      | 214 +++++++++++++++++++++++++
 2 files changed, 214 insertions(+), 210 deletions(-)
 create mode 100644 userbenchmark/api-coverage/run.py

diff --git a/userbenchmark/api-coverage/__init__.py b/userbenchmark/api-coverage/__init__.py
index 002226a105..8b13789179 100644
--- a/userbenchmark/api-coverage/__init__.py
+++ b/userbenchmark/api-coverage/__init__.py
@@ -1,211 +1 @@
-import itertools
-import time
-from datetime import datetime
-from typing import List
-import json
-import numpy as np
-import argparse
-import re
-import torch
 
-from ..utils import REPO_PATH, add_path, get_output_dir, get_output_json, dump_output
-
-with add_path(REPO_PATH):
-    from torchbenchmark.util.experiment.instantiator import list_models, load_model, TorchBenchModelConfig
-    from torchbenchmark.util.experiment.metrics import TorchBenchModelMetrics, get_model_test_metrics
-    import torchbenchmark.util.experiment.metrics
-
-BM_NAME = "api-coverage"
-
-
-def parse_func(func):
-    if hasattr(func, '__module__'):
-        module_name = func.__module__
-        func_name = func.__name__
-    else:
-        if hasattr(func, '__qualname__'):
-            func_name = func.__qualname__
-            module_name = ''
-        else:
-            if type(func) == torch._C.Generator:
-                func_name = 'torch._C.Generator'
-                module_name = ''
-            else:
-                raise RuntimeError("no matched module and func name: ", func, type(func))
-    return module_name, func_name
-
-
-def generate_API_list():
-    tmp_api_list = set()
-    raw_all_apis = set(torch.overrides.get_testing_overrides().keys())
-    # collect all items' attribute  `module` to a list
-    for item in raw_all_apis:
-        module_name, func_name = parse_func(item)
-        # if (module_name, func_name) in api_list:
-        # print("duplicated: ", (module_name, func_name))
-        tmp_api_list.add((module_name, func_name))
-    ignored_funcs = set([_ for _ in torch.overrides.get_ignored_functions() if _ not in [True, False]])
-    tmp_ignored_api_list = set()
-    for item in ignored_funcs:
-        module_name, func_name = parse_func(item)
-        tmp_ignored_api_list.add((module_name, func_name))
-    return tmp_api_list, tmp_ignored_api_list
-
-API_LIST, IGNORED_API_LIST = generate_API_list()
-
-
-class CoverageMode(torch.overrides.TorchFunctionMode):
-
-    def __init__(self, model='', output_file=None):
-        self.model = model
-        self.seen = set()
-        self.api_used = set()
-        self.output_file = output_file
-        self.api_need_support = set()
-
-    def check_func_in_APIs(self, func):
-        module_name, func_name = parse_func(func)
-        if (module_name, func_name) not in API_LIST:
-            if (module_name, func_name) not in IGNORED_API_LIST and module_name != 'torch._ops.profiler':
-                new_pair = (module_name, func_name)
-                if new_pair not in self.api_need_support:
-                    print("not in API_LIST or IGNORED_API_LIST: (%s, %s)" % (module_name, func_name))
-                    self.api_need_support.add((module_name, func_name))
-        else:
-            self.api_used.add((module_name, func_name))
-            # debug
-            # print("in APIs: ", (module_name, func_name))
-
-    def get_api_coverage_rate(self):
-        return len(self.api_used) / len(API_LIST)
-
-    def __torch_function__(self, func, types, args=(), kwargs=None):
-        self.seen.add(func)
-        if kwargs is None:
-            kwargs = {}
-        self.check_func_in_APIs(func)
-        return func(*args, **kwargs)
-
-    def commit(self):
-        if self.output_file:
-            with open(self.output_file, 'a') as f:
-                for api in self.api_used:
-                    f.write("%s,%s\n" % (api[0], api[1]))
-
-    def update_api_used(self, output: set):
-        for api in self.api_used:
-            output.add(api)
-    
-    def update_need_support(self, output: set):
-        for api in self.api_need_support:
-            output.add(api)
-
-
-def generate_model_config(model_name: str) -> List[TorchBenchModelConfig]:
-    devices = ["cpu", "cuda"]
-    tests = ["train", "eval"]
-    cfgs = itertools.product(*[devices, tests])
-    result = [TorchBenchModelConfig(
-        name=model_name,
-        device=device,
-        test=test,
-        batch_size=None,
-        jit=False,
-        extra_args=[],
-        extra_env=None,
-    ) for device, test in cfgs]
-    return result
-
-
-def parse_args(args: List[str]):
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-m", "--models", default="",
-                        help="Specify the models to run, default (empty) runs all models.")
-    parser.add_argument("-d", "--device", default="cpu", help="Specify the device.")
-    parser.add_argument("-t", "--test", default="eval,train", help="Specify the test.")
-    parser.add_argument("-o", "--output", type=str, help="The default output json file.")
-    args = parser.parse_args(args)
-    return args
-
-
-def generate_filter(args: argparse.Namespace):
-    allowed_models = args.models
-    if allowed_models:
-        allowed_models = allowed_models.split(",") if "," in allowed_models else [allowed_models]
-    allowed_devices = args.device
-    allowed_devices = allowed_devices.split(",") if "," in allowed_devices else [allowed_devices]
-    allowed_tests = args.test
-    allowed_tests = allowed_tests.split(",") if "," in allowed_tests else [allowed_tests]
-
-    def cfg_filter(cfg: TorchBenchModelConfig) -> bool:
-        if cfg.device in allowed_devices and cfg.test in allowed_tests:
-            if not allowed_models:
-                return True
-            else:
-                return cfg.name in allowed_models
-        return False
-    return cfg_filter
-
-
-def run(args: List[str]):
-    args = parse_args(args)
-    output_dir = get_output_dir(BM_NAME)
-    models = list_models()
-    cfgs = list(itertools.chain(*map(generate_model_config, models)))
-    cfg_filter = generate_filter(args)
-    torchbenchmark.util.experiment.metrics.BENCHMARK_ITERS = 1
-    torchbenchmark.util.experiment.metrics.WARMUP_ROUNDS = 0
-    single_round_result = []
-    api_used = set()
-    api_need_support = set()
-    for cfg in filter(cfg_filter, cfgs):
-        try:
-            print(cfg.name)
-            # if cfg.name in ['doctr_det_predictor', 'doctr_reco_predictor', 'densenet121', 'resnet152']:
-            #     continue
-            # load the model instance within the same process
-            model = load_model(cfg)
-            # get the model test metrics
-            with CoverageMode('', '') as coverage:
-                try:
-                    get_model_test_metrics(model)
-                finally:
-                    coverage.update_api_used(api_used)
-                    coverage.update_need_support(api_need_support)
-        except NotImplementedError:
-            # some models don't implement the test specified
-            single_round_result.append({
-                'cfg': cfg.__dict__,
-                'raw_metrics': "NotImplemented",
-            })
-        except RuntimeError as e:
-            single_round_result.append({
-                'cfg': cfg.__dict__,
-                'raw_metrics': f"RuntimeError: {e}",
-            })
-
-    # reduce full results to metrics
-    # log detailed results in the .userbenchmark/model-stableness/logs/ directory
-    log_dir = output_dir.joinpath("logs")
-    log_dir.mkdir(exist_ok=True, parents=True)
-    fname = "logs-{}.json".format(datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S"))
-    full_fname = log_dir.joinpath(fname)
-    with open(full_fname, 'w') as f:
-        json.dump(single_round_result, f, indent=4)
-    # log the api coverage
-    api_coverage_fname = log_dir.joinpath("%s-api_coverage.csv" % fname)
-    missed_apis = API_LIST - api_used
-    with open(api_coverage_fname, 'w') as f:
-        f.write("API coverage rate: %d/%d = %.2f%%\n" %
-                (len(api_used), len(API_LIST), len(api_used) / len(API_LIST) * 100))
-        f.write("missed APIs:\n")
-        f.write("module_name,func_name\n")
-        for api in missed_apis:
-            f.write("%s,%s\n" % (api[0], api[1]))
-    if api_need_support:
-        api_need_support_fname = log_dir.joinpath("%s-api_need_support.csv" % fname)
-        with open(api_need_support_fname, 'w') as f:
-            f.write("APIs called but not in API_LIST and IGNORED_API_LIST\n")
-            f.write("module_name,func_name\n")
-            for api in api_need_support:
-                f.write("%s,%s\n" % (api[0], api[1]))
diff --git a/userbenchmark/api-coverage/run.py b/userbenchmark/api-coverage/run.py
new file mode 100644
index 0000000000..ded56f6658
--- /dev/null
+++ b/userbenchmark/api-coverage/run.py
@@ -0,0 +1,214 @@
+import itertools
+import time
+from datetime import datetime
+from typing import List
+import json
+import numpy as np
+import argparse
+import re
+import torch
+
+from ..utils import REPO_PATH, add_path, get_output_dir, get_output_json, dump_output
+
+with add_path(REPO_PATH):
+    from torchbenchmark.util.experiment.instantiator import list_models, load_model, TorchBenchModelConfig
+    from torchbenchmark.util.experiment.metrics import TorchBenchModelMetrics, get_model_test_metrics
+    import torchbenchmark.util.experiment.metrics
+
+BM_NAME = "api-coverage"
+
+
+def parse_func(func):
+    if hasattr(func, '__module__'):
+        module_name = func.__module__
+        func_name = func.__name__
+    else:
+        if hasattr(func, '__qualname__'):
+            func_name = func.__qualname__
+            module_name = ''
+        else:
+            if type(func) == torch._C.Generator:
+                func_name = 'torch._C.Generator'
+                module_name = ''
+            else:
+                raise RuntimeError("no matched module and func name: ", func, type(func))
+    return module_name, func_name
+
+
+def generate_API_list():
+    tmp_api_list = set()
+    raw_all_apis = set(torch.overrides.get_testing_overrides().keys())
+    # collect all items' attribute  `module` to a list
+    for item in raw_all_apis:
+        module_name, func_name = parse_func(item)
+        # if (module_name, func_name) in api_list:
+        # print("duplicated: ", (module_name, func_name))
+        tmp_api_list.add((module_name, func_name))
+    ignored_funcs = set([_ for _ in torch.overrides.get_ignored_functions() if _ not in [True, False]])
+    tmp_ignored_api_list = set()
+    for item in ignored_funcs:
+        module_name, func_name = parse_func(item)
+        tmp_ignored_api_list.add((module_name, func_name))
+    return tmp_api_list, tmp_ignored_api_list
+
+API_LIST, IGNORED_API_LIST = generate_API_list()
+
+
+class CoverageMode(torch.overrides.TorchFunctionMode):
+
+    def __init__(self, model='', output_file=None):
+        self.model = model
+        self.seen = set()
+        self.api_used = set()
+        self.output_file = output_file
+        self.api_need_support = set()
+
+    def check_func_in_APIs(self, func):
+        module_name, func_name = parse_func(func)
+        if (module_name, func_name) not in API_LIST:
+            if (module_name, func_name) not in IGNORED_API_LIST and module_name != 'torch._ops.profiler':
+                new_pair = (module_name, func_name)
+                if new_pair not in self.api_need_support:
+                    # debugging purpose
+                    # print("not in API_LIST or IGNORED_API_LIST: (%s, %s)" % (module_name, func_name))
+                    self.api_need_support.add((module_name, func_name))
+        else:
+            self.api_used.add((module_name, func_name))
+            # debug
+            # print("in APIs: ", (module_name, func_name))
+
+    def get_api_coverage_rate(self):
+        return len(self.api_used) / len(API_LIST)
+
+    def __torch_function__(self, func, types, args=(), kwargs=None):
+        self.seen.add(func)
+        if kwargs is None:
+            kwargs = {}
+        self.check_func_in_APIs(func)
+        return func(*args, **kwargs)
+
+    def commit(self):
+        if self.output_file:
+            with open(self.output_file, 'a') as f:
+                for api in self.api_used:
+                    f.write("%s,%s\n" % (api[0], api[1]))
+
+    def update_api_used(self, output: set):
+        for api in self.api_used:
+            output.add(api)
+    
+    def update_need_support(self, output: set):
+        for api in self.api_need_support:
+            output.add(api)
+
+
+def generate_model_config(model_name: str) -> List[TorchBenchModelConfig]:
+    devices = ["cpu", "cuda"]
+    tests = ["train", "eval"]
+    cfgs = itertools.product(*[devices, tests])
+    result = [TorchBenchModelConfig(
+        name=model_name,
+        device=device,
+        test=test,
+        batch_size=None,
+        jit=False,
+        extra_args=[],
+        extra_env=None,
+    ) for device, test in cfgs]
+    return result
+
+
+def parse_args(args: List[str]):
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-m", "--models", default="",
+                        help="Specify the models to run, default (empty) runs all models.")
+    parser.add_argument("-d", "--device", default="cuda", help="Specify the device.")
+    parser.add_argument("-t", "--test", default="eval,train", help="Specify the test.")
+    parser.add_argument("-o", "--output", type=str, help="The default output json file.")
+    args = parser.parse_args(args)
+    return args
+
+
+def generate_filter(args: argparse.Namespace):
+    allowed_models = args.models
+    if allowed_models:
+        allowed_models = allowed_models.split(",") if "," in allowed_models else [allowed_models]
+    allowed_devices = args.device
+    allowed_devices = allowed_devices.split(",") if "," in allowed_devices else [allowed_devices]
+    allowed_tests = args.test
+    allowed_tests = allowed_tests.split(",") if "," in allowed_tests else [allowed_tests]
+
+    def cfg_filter(cfg: TorchBenchModelConfig) -> bool:
+        if cfg.device in allowed_devices and cfg.test in allowed_tests:
+            if not allowed_models:
+                return True
+            else:
+                return cfg.name in allowed_models
+        return False
+    return cfg_filter
+
+
+def run(args: List[str]):
+    args = parse_args(args)
+    output_dir = get_output_dir(BM_NAME)
+    models = list_models()
+    cfgs = list(itertools.chain(*map(generate_model_config, models)))
+    cfg_filter = generate_filter(args)
+    torchbenchmark.util.experiment.metrics.BENCHMARK_ITERS = 1
+    torchbenchmark.util.experiment.metrics.WARMUP_ROUNDS = 0
+    single_round_result = []
+    api_used = set()
+    api_need_support = set()
+    for cfg in filter(cfg_filter, cfgs):
+        try:
+            # load the model instance within the same process
+            model = load_model(cfg)
+            # get the model test metrics
+            with CoverageMode('', '') as coverage:
+                try:
+                    get_model_test_metrics(model, metrics=["latencies"])
+                finally:
+                    coverage.update_api_used(api_used)
+                    coverage.update_need_support(api_need_support)
+        except NotImplementedError:
+            # some models don't implement the test specified
+            single_round_result.append({
+                'cfg': cfg.__dict__,
+                'raw_metrics': "NotImplemented",
+            })
+        except RuntimeError as e:
+            single_round_result.append({
+                'cfg': cfg.__dict__,
+                'raw_metrics': f"RuntimeError: {e}",
+            })
+
+    # reduce full results to metrics
+    # log detailed results in the .userbenchmark/model-stableness/logs/ directory
+    log_dir = output_dir.joinpath("logs")
+    log_dir.mkdir(exist_ok=True, parents=True)
+    fname = "logs-{}.json".format(datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S"))
+    full_fname = log_dir.joinpath(fname)
+    with open(full_fname, 'w') as f:
+        json.dump(single_round_result, f, indent=4)
+    # log the api coverage
+    api_coverage_fname = log_dir.joinpath("%s-api_coverage.csv" % fname)
+    missed_apis = API_LIST - api_used
+    with open(api_coverage_fname, 'w') as f:
+        f.write("API coverage rate: %d/%d = %.2f%%\n" %
+                (len(api_used), len(API_LIST), len(api_used) / len(API_LIST) * 100))
+        f.write("=====Used APIs=====\n")
+        f.write("module_name,func_name\n")
+        for api in api_used:
+            f.write("%s,%s\n" % (api[0], api[1]))
+        f.write("=====Missed APIs=====\n")
+        f.write("module_name,func_name\n")
+        for api in missed_apis:
+            f.write("%s,%s\n" % (api[0], api[1]))
+    if api_need_support:
+        api_need_support_fname = log_dir.joinpath("%s-api_need_support.csv" % fname)
+        with open(api_need_support_fname, 'w') as f:
+            f.write("APIs called but not in API_LIST and IGNORED_API_LIST\n")
+            f.write("module_name,func_name\n")
+            for api in api_need_support:
+                f.write("%s,%s\n" % (api[0], api[1]))
+    print("The detailed results are saved in %s" % api_coverage_fname)