diff --git a/tests/impl.cpp b/tests/impl.cpp index 8ff57331..555185b8 100644 --- a/tests/impl.cpp +++ b/tests/impl.cpp @@ -8453,6 +8453,50 @@ result_t test_mm_dp_pd(const SSE2NEONTestImpl &impl, uint32_t iter) return TEST_SUCCESS; } +result_t _validateFloatEpsilon(__m128 a, + float f0, + float f1, + float f2, + float f3, + float epsilon) +{ + const float *t = (const float *) &a; + float t0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); + float t1 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 1); + float t2 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 2); + float t3 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 3); + float df0 = fabsf(t0 - f0); + float df1 = fabsf(t1 - f1); + float df2 = fabsf(t2 - f2); + float df3 = fabsf(t3 - f3); + + // Due to floating-point error, subtracting floating-point number with NaN + // and zero value usually produces erroneous result. Therefore, we directly + // define the difference of two floating-point numbers to zero if both + // numbers are NaN or zero. + if ((std::isnan(t[0]) && std::isnan(f0)) || (t[0] == 0 && f0 == 0)) { + df0 = 0; + } + + if ((std::isnan(t[1]) && std::isnan(f1)) || (t[1] == 0 && f1 == 0)) { + df1 = 0; + } + + if ((std::isnan(t[2]) && std::isnan(f2)) || (t[2] == 0 && f2 == 0)) { + df2 = 0; + } + + if ((std::isnan(t[3]) && std::isnan(f3)) || (t[3] == 0 && f3 == 0)) { + df3 = 0; + } + + ASSERT_RETURN(df0 < epsilon); + ASSERT_RETURN(df1 < epsilon); + ASSERT_RETURN(df2 < epsilon); + ASSERT_RETURN(df3 < epsilon); + return TEST_SUCCESS; +} + #define MM_DP_PS_TEST_CASE_WITH(IMM) \ do { \ const float *_a = impl.mTestFloatPointer1; \ @@ -8468,7 +8512,7 @@ result_t test_mm_dp_pd(const SSE2NEONTestImpl &impl, uint32_t iter) for (size_t i = 0; i < 4; i++) \ r[i] = (imm & (1 << i)) ? sum : 0; \ /* the epsilon has to be large enough, otherwise test suite fails. */ \ - if (validateFloatEpsilon(out, r[0], r[1], r[2], r[3], 2050.0f) != \ + if (_validateFloatEpsilon(out, r[0], r[1], r[2], r[3], 2050.0f) != \ TEST_SUCCESS) \ return TEST_FAIL; \ } while (0)