[QNN EP] Fix test zero-point calculation and flaky MatMul test (micro…

…soft#17338) ### Description - Fix incorrect zero-point calculation in unit tests. Affects int8(signed) QDQ models. - Replace flaky MatMul test that occasionally fails on main branch with a version that uses explicit inputs. ### Motivation and Context Fix bug and improve test accuracy and stability.
xadupre · Aug 30, 2023 · 21ae86e · 21ae86e
1 parent 922629a
commit 21ae86e
Show file tree

Hide file tree

Showing 4 changed files with 90 additions and 28 deletions.
diff --git a/onnxruntime/test/providers/qnn/matmul_test.cpp b/onnxruntime/test/providers/qnn/matmul_test.cpp
@@ -57,7 +57,8 @@ static GetTestQDQModelFn<QuantType> BuildMatMulOpQDQTestCase(const TestInputDef<
 static void RunMatMulOpOpTest(const TestInputDef<float>& input1_def,
                               const TestInputDef<float>& input2_def,
                               ExpectedEPNodeAssignment expected_ep_assignment,
-                              int opset = 13) {
+                              int opset = 13,
+                              float f32_abs_err = 1e-4f) {
   ProviderOptions provider_options;
 #if defined(_WIN32)
   provider_options["backend_path"] = "QnnCpu.dll";
@@ -69,7 +70,7 @@ static void RunMatMulOpOpTest(const TestInputDef<float>& input1_def,
                   provider_options,
                   opset,
                   expected_ep_assignment,
-                  2e-4f);
+                  f32_abs_err);
 }
 
 // Runs a QDQ MatMul model on the QNN HTP backend. Checks the graph node assignment, and that the
@@ -105,10 +106,19 @@ TEST_F(QnnCPUBackendTests, MatMulOp) {
 }
 
 // Test MatMul broadcasting
+// Note slight inaccuracy in CPU backend:
+// Expected: contains 896 values, where each value and its corresponding value in 16-byte object
+// <80-03 00-00 00-00 00-00 40-00 34-F0 5B-01 00-00> are an almost-equal pair
+// Actual: 16-byte object <80-03 00-00 00-00 00-00 40-00 23-F0 5B-01 00-00>,
+// where the value pair (148.536011, 148.536255) at index #4 don't match, which is 0.000244141 from 148.536
 TEST_F(QnnCPUBackendTests, MatMulOp_Broadcast) {
-  RunMatMulOpOpTest(TestInputDef<float>({28, 1, 64}, false, -10.0f, 10.0f),
-                    TestInputDef<float>({64, 32}, false, -10.0f, 10.0f),
-                    ExpectedEPNodeAssignment::All, 18);
+  // Create two matrices with element values in the range [-10.0, 10.0].
+  std::vector<float> input_a = GetFloatDataInRange(-10.0f, 10.0f, 28 * 64);
+  std::vector<float> input_b = GetFloatDataInRange(-10.0f, 10.0f, 64 * 32);
+
+  RunMatMulOpOpTest(TestInputDef<float>({28, 1, 64}, false, input_a),
+                    TestInputDef<float>({64, 32}, false, input_b),
+                    ExpectedEPNodeAssignment::All, 18, 0.00026f);
 }
 
 #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)

diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.cc b/onnxruntime/test/providers/qnn/qnn_test_utils.cc
@@ -4,6 +4,7 @@
 #if !defined(ORT_MINIMAL_BUILD)
 
 #include "test/providers/qnn/qnn_test_utils.h"
+#include <cassert>
 #include "test/util/include/asserts.h"
 #include "test/util/include/default_providers.h"
 #include "test/util/include/test/test_environment.h"
@@ -15,6 +16,29 @@
 namespace onnxruntime {
 namespace test {
 
+std::vector<float> GetFloatDataInRange(float min_val, float max_val, size_t num_elems) {
+  if (num_elems == 0) {
+    return {};
+  }
+
+  std::vector<float> data;
+  data.reserve(num_elems);
+
+  const float step_size = (max_val - min_val) / static_cast<float>(num_elems);
+  float val = min_val;
+  for (size_t i = 0; i < num_elems; i++) {
+    data.push_back(val);
+    val += step_size;
+  }
+
+  // Try to ensure that 0.0 and max_val are also included in the array.
+  // If num_elems is less than 3, then not all of min_val, 0, and max_val will be present.
+  data[num_elems / 2] = 0.0f;
+  data[num_elems - 1] = max_val;
+
+  return data;
+}
+
 void RunQnnModelTest(const GetTestModelFn& build_test_case, const ProviderOptions& provider_options,
                      int opset_version, ExpectedEPNodeAssignment expected_ep_assignment,
                      float fp32_abs_err, logging::Severity log_severity) {

diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.h b/onnxruntime/test/providers/qnn/qnn_test_utils.h
@@ -8,6 +8,7 @@
 #include <cmath>
 #include <unordered_map>
 #include "core/framework/provider_options.h"
+#include "core/util/qmath.h"
 
 #include "test/optimizer/qdq_test_utils.h"
 #include "test/util/include/test_utils.h"
@@ -30,23 +31,19 @@ struct QuantParams {
   QType zero_point;
 
   static QuantParams<QType> Compute(float rmin, float rmax) {
-    if (rmin == 0.0f && rmax == 0.0f) {  // Quantizing a single zero.
-      return QuantParams<QType>{1.0f, 0};
-    }
+    // Ensure a minimum range of 0.0001 (required by QNN)
+    rmax = std::max(rmax, rmin + 0.0001f);
 
-    if (rmin == rmax) {  // One data-point (x) to quantize.
-      if (rmin < 0) {    // new range is [-x , 0.0f]
-        rmax = 0.0f;
-      } else {  // new range is [0.0f, x]
-        rmin = 0.0f;
-      }
-    }
+    // Both QNN and ORT require the range to include 0.0f
+    rmin = std::min(rmin, 0.0f);
+    rmax = std::max(rmax, 0.0f);
 
     constexpr float qmin = static_cast<float>(std::numeric_limits<QType>::min());
     constexpr float qmax = static_cast<float>(std::numeric_limits<QType>::max());
 
-    const float scale = (rmax - rmin) / (qmax - qmin);
-    const QType zero_point = static_cast<QType>(std::roundf((qmin - rmin) / scale));
+    const float scale = rmax == rmin ? 1.0f : (rmax - rmin) / (qmax - qmin);
+    const float initial_zero_point = qmin - (rmin / scale);
+    const QType zero_point = static_cast<QType>(RoundHalfToEven(std::max(qmin, std::min(qmax, initial_zero_point))));
 
     return QuantParams<QType>{scale, zero_point};
   }
@@ -75,6 +72,18 @@ inline QuantParams<QType> GetDataQuantParams(gsl::span<const float> data) {
   return QuantParams<QType>::Compute(min_val, max_val);
 }
 
+/**
+ * Returns a float vector with data in the specified range. Uses linear interpolation to fill the elements in the array
+ * and ensures that min_val, 0.0f, and max_val are all included.
+ * TODO(adrianlizarraga): Should use this instead of random *float* test inputs for test repeatability/stability!
+ *
+ * \param min_val The minimum value.
+ * \param max_val The maximum value.
+ * \param num_elems The number of elements in the result. Should be at least 3 to include min, 0, and max.
+ * \return A vector of floats with elements set to values in the specified range.
+ */
+std::vector<float> GetFloatDataInRange(float min_val, float max_val, size_t num_elems);
+
 // Class that defines an input that can be created with ModelTestBuilder.
 // Defines whether the input is an initializer and if the data should be randomized or if
 // set to an explicit value.
@@ -89,7 +98,7 @@ struct TestInputDef {
     T max;
   };
 
-  TestInputDef() : is_initializer_(false) {}
+  TestInputDef() = default;
 
   // Creates a random input definition. Specify its shape, whether it's an initializer, and
   // the min/max range.
@@ -185,8 +194,8 @@ struct TestInputDef {
  private:
   std::vector<int64_t> shape_;
   std::variant<RawData, RandomData> data_info_;
-  bool is_initializer_;
-  bool has_range_override_;
+  bool is_initializer_{false};
+  bool has_range_override_{false};
   std::pair<T, T> range_override_;
 };
 

diff --git a/onnxruntime/test/providers/qnn/reduce_op_test.cc b/onnxruntime/test/providers/qnn/reduce_op_test.cc
@@ -357,14 +357,16 @@ GetTestQDQModelFn<QuantType> BuildQDQReduceOpTestCase(const std::string& reduce_
  * \param keepdims Common attribute for all reduce operations.
  * \param opset The opset version. Some opset versions have "axes" as an attribute or input.
  * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None)
+ * \param fp32_abs_err Error tolerance.
  */
 template <typename QuantType>
 static void RunReduceOpQDQTest(const std::string& op_type,
                                const TestInputDef<float>& input_def,
                                const std::vector<int64_t>& axes,
                                bool keepdims,
                                int opset,
-                               ExpectedEPNodeAssignment expected_ep_assignment) {
+                               ExpectedEPNodeAssignment expected_ep_assignment,
+                               float fp32_abs_err = 1e-5f) {
   ProviderOptions provider_options;
 #if defined(_WIN32)
   provider_options["backend_path"] = "QnnHtp.dll";
@@ -382,7 +384,7 @@ static void RunReduceOpQDQTest(const std::string& op_type,
                        provider_options,
                        opset,
                        expected_ep_assignment,
-                       1e-5f);
+                       fp32_abs_err);
 }
 
 //
@@ -441,8 +443,10 @@ TEST_F(QnnHTPBackendTests, ReduceSumU8Opset11) {
 // - Uses int8 as the quantization type.
 // - Uses opset 13, which has "axes" as an input.
 TEST_F(QnnHTPBackendTests, ReduceSumS8Opset13) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 9);
+
   RunReduceOpQDQTest<int8_t>("ReduceSum",
-                             TestInputDef<float>({2, 2}, false, -10.0f, 10.0f),
+                             TestInputDef<float>({3, 3}, false, input_data),
                              {0, 1},  // axes
                              true,    // keepdims
                              13,      // opset
@@ -451,8 +455,10 @@ TEST_F(QnnHTPBackendTests, ReduceSumS8Opset13) {
 
 // Tests that keepdims = false generates expected results.
 TEST_F(QnnHTPBackendTests, ReduceSumS8Opset13_NoKeepDims) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 9);
+
   RunReduceOpQDQTest<int8_t>("ReduceSum",
-                             TestInputDef<float>({2, 2}, false, -10.0f, 10.0f),
+                             TestInputDef<float>({3, 3}, false, input_data),
                              {1},    // axes
                              false,  // keepdims
                              13,     // opset
@@ -507,8 +513,10 @@ TEST_F(QnnHTPBackendTests, ReduceMaxU8Opset13) {
 // - Uses int8 as the quantization type.
 // - Uses opset 18, which has "axes" as an input.
 TEST_F(QnnHTPBackendTests, ReduceMaxS8Opset18) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 9);
+
   RunReduceOpQDQTest<int8_t>("ReduceMax",
-                             TestInputDef<float>({2, 2}, false, -10.0f, 10.0f),
+                             TestInputDef<float>({3, 3}, false, input_data),
                              {0, 1},  // axes
                              true,    // keepdims
                              18,      // opset
@@ -552,8 +560,10 @@ TEST_F(QnnHTPBackendTests, ReduceMinU8Opset13) {
 //
 // Uses int8 as the quantization type.
 TEST_F(QnnHTPBackendTests, ReduceMinS8Opset18) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 9);
+
   RunReduceOpQDQTest<int8_t>("ReduceMin",
-                             TestInputDef<float>({2, 2}, false, -10.0f, 10.0f),
+                             TestInputDef<float>({3, 3}, false, input_data),
                              {0, 1},  // axes
                              true,    // keepdims
                              18,      // opset
@@ -616,13 +626,22 @@ TEST_F(QnnHTPBackendTests, ReduceMeanU8Opset13) {
 //
 // - Uses int8 as the quantization type.
 // - Uses opset 18, which has "axes" as an input.
+//
+// TODO(adrianlizarraga): Inaccuracy detected for output 'output', element 0.
+// Output quant params: scale=0.0007829521200619638, zero_point=127.
+// Expected val: -0.19965279102325439
+// QNN QDQ val: -0.19730393588542938 (err 0.0023488551378250122)
+// CPU QDQ val: -0.19965279102325439 (err 0)
 TEST_F(QnnHTPBackendTests, ReduceMeanS8Opset18) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
+
   RunReduceOpQDQTest<int8_t>("ReduceMean",
-                             TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
+                             TestInputDef<float>({1, 3, 4, 4}, false, input_data),
                              {0, 1, 2, 3},  // axes
                              true,          // keepdims
                              18,            // opset
-                             ExpectedEPNodeAssignment::All);
+                             ExpectedEPNodeAssignment::All,
+                             0.0016f);  // TODO: Remove additional tolerance needed for inaccuracy
 }
 
 #endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)