diff --git a/tests/impl.cpp b/tests/impl.cpp
index 8ff57331..555185b8 100644
--- a/tests/impl.cpp
+++ b/tests/impl.cpp
@@ -8453,6 +8453,50 @@ result_t test_mm_dp_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
     return TEST_SUCCESS;
 }
 
+result_t _validateFloatEpsilon(__m128 a,
+                              float f0,
+                              float f1,
+                              float f2,
+                              float f3,
+                              float epsilon)
+{
+    const float *t = (const float *) &a;
+    float t0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
+    float t1 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 1);
+    float t2 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 2);
+    float t3 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 3);
+    float df0 = fabsf(t0 - f0);
+    float df1 = fabsf(t1 - f1);
+    float df2 = fabsf(t2 - f2);
+    float df3 = fabsf(t3 - f3);
+
+    // Due to floating-point error, subtracting floating-point number with NaN
+    // and zero value usually produces erroneous result. Therefore, we directly
+    // define the difference of two floating-point numbers to zero if both
+    // numbers are NaN or zero.
+    if ((std::isnan(t[0]) && std::isnan(f0)) || (t[0] == 0 && f0 == 0)) {
+        df0 = 0;
+    }
+
+    if ((std::isnan(t[1]) && std::isnan(f1)) || (t[1] == 0 && f1 == 0)) {
+        df1 = 0;
+    }
+
+    if ((std::isnan(t[2]) && std::isnan(f2)) || (t[2] == 0 && f2 == 0)) {
+        df2 = 0;
+    }
+
+    if ((std::isnan(t[3]) && std::isnan(f3)) || (t[3] == 0 && f3 == 0)) {
+        df3 = 0;
+    }
+
+    ASSERT_RETURN(df0 < epsilon);
+    ASSERT_RETURN(df1 < epsilon);
+    ASSERT_RETURN(df2 < epsilon);
+    ASSERT_RETURN(df3 < epsilon);
+    return TEST_SUCCESS;
+}
+
 #define MM_DP_PS_TEST_CASE_WITH(IMM)                                          \
     do {                                                                      \
         const float *_a = impl.mTestFloatPointer1;                            \
@@ -8468,7 +8512,7 @@ result_t test_mm_dp_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
         for (size_t i = 0; i < 4; i++)                                        \
             r[i] = (imm & (1 << i)) ? sum : 0;                                \
         /* the epsilon has to be large enough, otherwise test suite fails. */ \
-        if (validateFloatEpsilon(out, r[0], r[1], r[2], r[3], 2050.0f) !=     \
+        if (_validateFloatEpsilon(out, r[0], r[1], r[2], r[3], 2050.0f) !=     \
             TEST_SUCCESS)                                                     \
             return TEST_FAIL;                                                 \
     } while (0)