From 60ed63b341e8a4341cfea28aae5ec38acb8e0f73 Mon Sep 17 00:00:00 2001
From: Bert Maher <bertrand@meta.com>
Date: Mon, 29 Apr 2024 06:31:29 -0700
Subject: [PATCH] Remove benchmark of inductor internal launch latency

Summary:
This launch path apparently changes a lot (see D56642231,
https://github.com/pytorch/pytorch/pull/124592).  A regression will show up in
other benchmarks, so let's just remove this one to save the maintenance hassle.

Reviewed By: masnesral

Differential Revision: D56671375

fbshipit-source-id: 509a39544f7750e0197da0c1cbaec307cba9cd75
---
 .../launch_latency/async_compilation.py       | 51 -------------------
 .../operators/launch_latency/operator.py      | 18 -------
 2 files changed, 69 deletions(-)
 delete mode 100644 torchbenchmark/operators/launch_latency/async_compilation.py

diff --git a/torchbenchmark/operators/launch_latency/async_compilation.py b/torchbenchmark/operators/launch_latency/async_compilation.py
deleted file mode 100644
index cbff0060e4..0000000000
--- a/torchbenchmark/operators/launch_latency/async_compilation.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from torch._inductor.codecache import AsyncCompile
-
-
-async_compile = AsyncCompile()
-
-inductor_nop = async_compile.triton(
-    "inductor_nop",
-    """
-import triton
-import triton.language as tl
-from triton.compiler.compiler import AttrsDescriptor
-
-try:
-    from torch._inductor.runtime import triton_heuristics
-except ImportError:
-    from torch._inductor import triton_heuristics
-
-@triton_heuristics.pointwise(
-    size_hints=[1],
-    triton_meta={'signature': {0: 'i32'}, 'device': 0, 'device_type': 'cuda', 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(), equal_to_1=())]},
-)
-@triton.jit
-def inductor_nop(x):
-    pass
-""",
-    device_str="cuda",
-)
-
-
-inductor_nop_args = async_compile.triton(
-    "inductor_nop_args",
-    """
-import triton
-import triton.language as tl
-from triton.compiler.compiler import AttrsDescriptor
-
-try:
-    from torch._inductor.runtime import triton_heuristics
-except ImportError:
-    from torch._inductor import triton_heuristics
-
-@triton_heuristics.pointwise(
-    size_hints=[1],
-    triton_meta={'signature': {0: '*fp32', 1: '*fp32', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: 'i32', 6: 'i32', 7: 'i32', 8: 'i32', 9: 'i32', 10: 'i32', 11: 'i32', 12: 'i32', 13: 'i32'}, 'device': 0, 'device_type': 'cuda', 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4), equal_to_1=(5, 6, 7, 8, 9, 10, 11, 12, 13))]},
-)
-@triton.jit
-def inductor_nop_args(t1, t2, t3, t4, t5, i1, i2, i3, i4, i5, i6, i7, i8, i9):
-    pass
-""",
-    device_str="cuda",
-)
diff --git a/torchbenchmark/operators/launch_latency/operator.py b/torchbenchmark/operators/launch_latency/operator.py
index 8b8754a288..48ccc92dd5 100644
--- a/torchbenchmark/operators/launch_latency/operator.py
+++ b/torchbenchmark/operators/launch_latency/operator.py
@@ -8,16 +8,8 @@
     register_benchmark,
     register_metric,
 )
-
-from .async_compilation import inductor_nop, inductor_nop_args
 from .kernels import nop_kernel, nop_with_args_kernel, trivial_add_kernel
 
-try:
-    from torch._inductor.runtime import triton_heuristics
-except ImportError:
-    # TODO(jansel): delete this case once D56408511 lands
-    from torch._inductor import triton_heuristics
-
 
 class Operator(BenchmarkOperator):
     DEFAULT_METRICS = ["walltime"]
@@ -59,16 +51,6 @@ def nop_triton_compiled_kernel_run(self, *args):
                 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, function, None, None, metadata, *args
             )
 
-    @register_benchmark()
-    def nop_inductor_kernel_run(self, *args):
-        stream = get_raw_stream(0)
-        grid = triton_heuristics.grid(1)
-
-        if len(args) == 0:
-            return lambda: inductor_nop.run(1, grid=grid, stream=stream)
-        args = args[:-5]
-        return lambda: inductor_nop_args.run(*args, grid=grid, stream=stream)
-
     @register_benchmark()
     def nop_inductor_kernel(self, *args):
         return lambda: trivial_add_kernel(*args)