From e375572037683cecf55a02dce9001e3643ec3ed4 Mon Sep 17 00:00:00 2001
From: Yvan Mokwinski <yvan.mokwinski@amd.com>
Date: Wed, 24 Feb 2021 18:10:28 -0700
Subject: [PATCH] implicit tolerance multiplier proposal (#192)

* implicit tolerance multiplier
---
 clients/common/rocsparse_check.cpp | 154 ++++++++++++++++++++++++-----
 clients/tests/test_csrmv.yaml      |   1 -
 clients/tests/test_gebsrmv.yaml    |   1 -
 3 files changed, 129 insertions(+), 27 deletions(-)
diff --git a/clients/common/rocsparse_check.cpp b/clients/common/rocsparse_check.cpp
index 8181f1c1..08acb40c 100644
--- a/clients/common/rocsparse_check.cpp
+++ b/clients/common/rocsparse_check.cpp
@@ -135,6 +135,8 @@ void unit_check_general(int64_t M, int64_t N, int64_t lda, const size_t* hCPU, c
     UNIT_CHECK(M, N, lda, hCPU, hGPU, ASSERT_EQ);
 }
 
+#define MAX_TOL_MULTIPLIER 4
+
 template <typename T>
 void near_check_general_template(rocsparse_int      M,
                                  rocsparse_int      N,
@@ -143,6 +145,7 @@ void near_check_general_template(rocsparse_int      M,
                                  const T*           hGPU,
                                  floating_data_t<T> tol = default_tolerance<T>::value)
 {
+    int tolm = 1;
     for(rocsparse_int j = 0; j < N; ++j)
     {
         for(rocsparse_int i = 0; i < M; ++i)
@@ -160,20 +163,51 @@ void near_check_general_template(rocsparse_int      M,
             }
             else
             {
-                ASSERT_NEAR(hCPU[i + j * lda], hGPU[i + j * lda], compare_val);
+                int k;
+                for(k = 1; k <= MAX_TOL_MULTIPLIER; ++k)
+                {
+                    if(std::abs(hCPU[i + j * lda] - hGPU[i + j * lda]) <= compare_val * k)
+                    {
+                        break;
+                    }
+                }
+
+                if(k > MAX_TOL_MULTIPLIER)
+                {
+                    ASSERT_NEAR(hCPU[i + j * lda], hGPU[i + j * lda], compare_val);
+                }
+                tolm = std::max(tolm, k);
             }
 #else
-            if(std::abs(hCPU[i + j * lda] - hGPU[i + j * lda]) >= compare_val)
+
+            int k;
+            for(k = 1; k <= MAX_TOL_MULTIPLIER; ++k)
+            {
+                if(std::abs(hCPU[i + j * lda] - hGPU[i + j * lda]) <= compare_val * k)
+                {
+                    break;
+                }
+            }
+
+            if(k > MAX_TOL_MULTIPLIER)
             {
                 std::cerr.precision(12);
                 std::cerr << "ASSERT_NEAR(" << hCPU[i + j * lda] << ", " << hGPU[i + j * lda]
                           << ") failed: " << std::abs(hCPU[i + j * lda] - hGPU[i + j * lda])
-                          << " exceeds compare_val " << compare_val << std::endl;
+                          << " exceeds permissive range [" << compare_val << ","
+                          << compare_val * MAX_TOL_MULTIPLIER << " ]" << std::endl;
                 exit(EXIT_FAILURE);
             }
+            tolm = std::max(tolm, k);
 #endif
         }
     }
+
+    if(tolm > 1)
+    {
+        std::cerr << "WARNING near_check has been permissive with a tolerance multiplier equal to "
+                  << tolm << std::endl;
+    }
 }
 
 template <>
@@ -184,6 +218,7 @@ void near_check_general_template(rocsparse_int                  M,
                                  const rocsparse_float_complex* hGPU,
                                  float                          tol)
 {
+    int tolm = 1;
     for(rocsparse_int j = 0; j < N; ++j)
     {
         for(rocsparse_int i = 0; i < M; ++i)
@@ -204,28 +239,62 @@ void near_check_general_template(rocsparse_int                  M,
             }
             else
             {
-                ASSERT_NEAR(std::real(hCPU[i + j * lda]),
-                            std::real(hGPU[i + j * lda]),
-                            std::real(compare_val));
-                ASSERT_NEAR(std::imag(hCPU[i + j * lda]),
-                            std::imag(hGPU[i + j * lda]),
-                            std::imag(compare_val));
+                int k;
+                for(k = 1; k <= MAX_TOL_MULTIPLIER; ++k)
+                {
+                    if(std::abs(std::real(hCPU[i + j * lda]) - std::real(hGPU[i + j * lda]))
+                           <= std::real(compare_val) * k
+                       && std::abs(std::imag(hCPU[i + j * lda]) - std::imag(hGPU[i + j * lda]))
+                              <= std::imag(compare_val) * k)
+                    {
+                        break;
+                    }
+                }
+
+                if(k > MAX_TOL_MULTIPLIER)
+                {
+                    ASSERT_NEAR(std::real(hCPU[i + j * lda]),
+                                std::real(hGPU[i + j * lda]),
+                                std::real(compare_val));
+                    ASSERT_NEAR(std::imag(hCPU[i + j * lda]),
+                                std::imag(hGPU[i + j * lda]),
+                                std::imag(compare_val));
+                }
+                tolm = std::max(tolm, k);
             }
 #else
-            if(std::abs(std::real(hCPU[i + j * lda]) - std::real(hGPU[i + j * lda]))
-                   >= std::real(compare_val)
-               || std::abs(std::imag(hCPU[i + j * lda]) - std::imag(hGPU[i + j * lda]))
-                      >= std::imag(compare_val))
+
+            int k;
+            for(k = 1; k <= MAX_TOL_MULTIPLIER; ++k)
+            {
+                if(std::abs(std::real(hCPU[i + j * lda]) - std::real(hGPU[i + j * lda]))
+                       <= std::real(compare_val) * k
+                   && std::abs(std::imag(hCPU[i + j * lda]) - std::imag(hGPU[i + j * lda]))
+                          <= std::imag(compare_val) * k)
+                {
+                    break;
+                }
+            }
+
+            if(k > MAX_TOL_MULTIPLIER)
             {
                 std::cerr.precision(16);
                 std::cerr << "ASSERT_NEAR(" << hCPU[i + j * lda] << ", " << hGPU[i + j * lda]
                           << ") failed: " << std::abs(hCPU[i + j * lda] - hGPU[i + j * lda])
-                          << " exceeds compare_val " << compare_val << std::endl;
+                          << " exceeds permissive range [" << compare_val << ","
+                          << compare_val * MAX_TOL_MULTIPLIER << " ]" << std::endl;
                 exit(EXIT_FAILURE);
             }
+            tolm = std::max(tolm, k);
 #endif
         }
     }
+
+    if(tolm > 1)
+    {
+        std::cerr << "WARNING near_check has been permissive with a tolerance multiplier equal to "
+                  << tolm << std::endl;
+    }
 }
 
 template <>
@@ -236,6 +305,7 @@ void near_check_general_template(rocsparse_int                   M,
                                  const rocsparse_double_complex* hGPU,
                                  double                          tol)
 {
+    int tolm = 1;
     for(rocsparse_int j = 0; j < N; ++j)
     {
         for(rocsparse_int i = 0; i < M; ++i)
@@ -256,28 +326,62 @@ void near_check_general_template(rocsparse_int                   M,
             }
             else
             {
-                ASSERT_NEAR(std::real(hCPU[i + j * lda]),
-                            std::real(hGPU[i + j * lda]),
-                            std::real(compare_val));
-                ASSERT_NEAR(std::imag(hCPU[i + j * lda]),
-                            std::imag(hGPU[i + j * lda]),
-                            std::imag(compare_val));
+                int k;
+                for(k = 1; k <= MAX_TOL_MULTIPLIER; ++k)
+                {
+                    if(std::abs(std::real(hCPU[i + j * lda]) - std::real(hGPU[i + j * lda]))
+                           <= std::real(compare_val) * k
+                       && std::abs(std::imag(hCPU[i + j * lda]) - std::imag(hGPU[i + j * lda]))
+                              <= std::imag(compare_val) * k)
+                    {
+                        break;
+                    }
+                }
+
+                if(k > MAX_TOL_MULTIPLIER)
+                {
+                    ASSERT_NEAR(std::real(hCPU[i + j * lda]),
+                                std::real(hGPU[i + j * lda]),
+                                std::real(compare_val));
+                    ASSERT_NEAR(std::imag(hCPU[i + j * lda]),
+                                std::imag(hGPU[i + j * lda]),
+                                std::imag(compare_val));
+                }
+                tolm = std::max(tolm, k);
             }
 #else
-            if(std::abs(std::real(hCPU[i + j * lda]) - std::real(hGPU[i + j * lda]))
-                   >= std::real(compare_val)
-               || std::abs(std::imag(hCPU[i + j * lda]) - std::imag(hGPU[i + j * lda]))
-                      >= std::imag(compare_val))
+
+            int k;
+            for(k = 1; k <= MAX_TOL_MULTIPLIER; ++k)
+            {
+                if(std::abs(std::real(hCPU[i + j * lda]) - std::real(hGPU[i + j * lda]))
+                       <= std::real(compare_val) * k
+                   && std::abs(std::imag(hCPU[i + j * lda]) - std::imag(hGPU[i + j * lda]))
+                          <= std::imag(compare_val) * k)
+                {
+                    break;
+                }
+            }
+
+            if(k > MAX_TOL_MULTIPLIER)
             {
                 std::cerr.precision(16);
                 std::cerr << "ASSERT_NEAR(" << hCPU[i + j * lda] << ", " << hGPU[i + j * lda]
                           << ") failed: " << std::abs(hCPU[i + j * lda] - hGPU[i + j * lda])
-                          << " exceeds compare_val " << compare_val << std::endl;
+                          << " exceeds permissive range [" << compare_val << ","
+                          << compare_val * MAX_TOL_MULTIPLIER << " ]" << std::endl;
                 exit(EXIT_FAILURE);
             }
+            tolm = std::max(tolm, k);
 #endif
         }
     }
+
+    if(tolm > 1)
+    {
+        std::cerr << "WARNING near_check has been permissive with a tolerance multiplier equal to "
+                  << tolm << std::endl;
+    }
 }
 
 template <typename T>
diff --git a/clients/tests/test_csrmv.yaml b/clients/tests/test_csrmv.yaml
index d2883ddc..edfd12ed 100644
--- a/clients/tests/test_csrmv.yaml
+++ b/clients/tests/test_csrmv.yaml
@@ -101,7 +101,6 @@ Tests:
   baseA: [rocsparse_index_base_zero, rocsparse_index_base_one]
   matrix: [rocsparse_matrix_random]
   algo: [0, 1]
-  tolm: [2]
 
 - name: csrmv_file
   category: quick
diff --git a/clients/tests/test_gebsrmv.yaml b/clients/tests/test_gebsrmv.yaml
index 8b6eb47e..d85147c9 100644
--- a/clients/tests/test_gebsrmv.yaml
+++ b/clients/tests/test_gebsrmv.yaml
@@ -137,7 +137,6 @@ Tests:
   baseA: [rocsparse_index_base_zero]
   matrix: [rocsparse_matrix_file_rocalution]
   filename: [nos7]
-  tolm: [2]
 
 #
 # More tests for code coverage