From 25a94efcabb16b6d56f05677eae939e98b078cf7 Mon Sep 17 00:00:00 2001
From: winskuo-quic <143469905+winskuo-quic@users.noreply.github.com>
Date: Thu, 9 Jan 2025 16:45:58 +0800
Subject: [PATCH] Qualcomm AI Engine Direct - Add unit test for Spill-Fill
 buffer (#7518)

Add unit test to validate the size of the Spill-Fill buffer.
---
 backends/qualcomm/tests/models.py            | 12 +++++++
 backends/qualcomm/tests/test_qnn_delegate.py | 37 ++++++++++++++++++++
 backends/qualcomm/utils/utils.py             |  8 +++--
 3 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/backends/qualcomm/tests/models.py b/backends/qualcomm/tests/models.py
index 3faa1dfbe9..96aab87826 100644
--- a/backends/qualcomm/tests/models.py
+++ b/backends/qualcomm/tests/models.py
@@ -596,6 +596,18 @@ def forward(self, input_pos, k_val):
         return k_out
 
 
+class LargeTensorLinear(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        hidden_dim = 4096
+        self.linear1 = torch.nn.Linear(512, hidden_dim)
+        self.linear2 = torch.nn.Linear(hidden_dim, 512)
+
+    def forward(self, x):
+        x1 = self.linear1(x) + self.linear1(x)
+        return self.linear2(x1)
+
+
 class LayerNorm(torch.nn.Module):
     def __init__(self):
         super().__init__()
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
index 43b78d341c..73ca1820f3 100644
--- a/backends/qualcomm/tests/test_qnn_delegate.py
+++ b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -1581,6 +1581,24 @@ def test_qnn_backend_skip_node_op(self):
             skip_node_op_set={"aten.add.Tensor"},
         )
 
+    def test_qnn_backend_spill_fill_buffer_size(self):
+        module = LargeTensorLinear()  # noqa: F405
+        sample_input = (torch.randn(1, 256, 512),)
+        edge_prog = capture_program(module, sample_input)
+
+        backend_options = generate_htp_compiler_spec(
+            use_fp16=True,
+            use_multi_contexts=True,
+        )
+        compiler_specs = generate_qnn_executorch_compiler_spec(
+            soc_model=self.chipset_table[TestQNN.model],
+            backend_options=backend_options,
+        )
+        partitioner = QnnPartitioner(compiler_specs)
+        edge_prog.exported_program = to_backend(edge_prog.exported_program, partitioner)
+        max_sf_size = update_spill_fill_size(edge_prog.exported_program)
+        self.assertNotEqual(0, max_sf_size)
+
     def test_qnn_backend_multi_contexts(self):
         module = SimpleModel()  # noqa: F405
         sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
@@ -2011,6 +2029,25 @@ def calibrator(gm):
         ).to_executorch()
         self.verify_output(module, sample_input, exec_prog)
 
+    def test_qnn_backend_spill_fill_buffer_size(self):
+        module = LargeTensorLinear()  # noqa: F405
+        sample_input = (torch.randn(1, 256, 512),)
+        module = self.get_qdq_module(module, sample_input)
+        edge_prog = capture_program(module, sample_input)
+
+        backend_options = generate_htp_compiler_spec(
+            use_fp16=False,
+            use_multi_contexts=True,
+        )
+        compiler_specs = generate_qnn_executorch_compiler_spec(
+            soc_model=self.chipset_table[TestQNN.model],
+            backend_options=backend_options,
+        )
+        partitioner = QnnPartitioner(compiler_specs)
+        edge_prog.exported_program = to_backend(edge_prog.exported_program, partitioner)
+        max_sf_size = update_spill_fill_size(edge_prog.exported_program)
+        self.assertNotEqual(0, max_sf_size)
+
     def test_qnn_backend_graph_level_mixed_precision(self):
         module = SimpleModel()  # noqa: F405
         sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py
index 3d2a9f8c85..e13705b3a8 100644
--- a/backends/qualcomm/utils/utils.py
+++ b/backends/qualcomm/utils/utils.py
@@ -269,15 +269,17 @@ def set_spec(module, options):
             options.backend_options.htp_options.max_sf_buf_size = max_sf_buf_size
             set_spec(module, options)
 
+    max_sf_size, modules_map = 0, {}
     if isinstance(exported_program, list):
-        max_sf_size, modules_map = 0, {}
         for prog in exported_program:
             max_sf_buf_size, module_map = get_program_info(prog)
             max_sf_size = max(max_sf_size, max_sf_buf_size)
             modules_map.update(module_map)
-        update_program(max_sf_size, modules_map)
     else:
-        update_program(*get_program_info(exported_program))
+        max_sf_size, module_map = get_program_info(exported_program)
+    update_program(max_sf_size, module_map)
+
+    return max_sf_size
 
 
 def get_decomp_table() -> Dict[torch._ops.OperatorBase, Callable]: