boostorg · mborland · Aug 5, 2022 · Aug 6, 2022 · Aug 6, 2022 · Aug 14, 2022
diff --git a/include/boost/math/special_functions/fast_float_distance.hpp b/include/boost/math/special_functions/fast_float_distance.hpp
@@ -0,0 +1,132 @@
+//  (C) Copyright Matt Borland 2022.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_MATH_SF_FAST_FLOAT_DISTANCE
+#define BOOST_MATH_SF_FAST_FLOAT_DISTANCE
+
+#include <boost/math/special_functions/next.hpp>
+#include <boost/math/tools/throw_exception.hpp>
+#include <stdexcept>
+#include <limits>
+
+#if defined(BOOST_MATH_USE_FLOAT128) && !defined(BOOST_MATH_STANDALONE)
+#include <boost/multiprecision/float128.hpp>
+#include <boost/multiprecision/detail/standalone_config.hpp>
+#define BOOST_MATH_USE_FAST_FLOAT128
+#elif defined(BOOST_MATH_USE_FLOAT128) && defined(BOOST_MATH_STANDALONE)
+#  if __has_include(<quadmath.h>)
+#    include <quadmath.h>
+#    define BOOST_MATH_USE_FAST_STANDALONE_FLOAT128
+#  endif
+#endif
+
+namespace boost { namespace math { 
+
+// https://randomascii.wordpress.com/2012/01/23/stupid-float-tricks-2/
+// https://blog.regehr.org/archives/959
+inline std::int32_t fast_float_distance(float a, float b)
+{
+    return boost::math::float_distance(a, b);
+}
+
+inline std::int64_t fast_float_distance(double a, double b)
+{
+    return boost::math::float_distance(a, b);
+}
+
+#ifdef BOOST_MATH_USE_FAST_FLOAT128
+boost::multiprecision::int128_type fast_float_distance(boost::multiprecision::float128_type a, boost::multiprecision::float128_type b)
+{
+    using std::abs;
+    using std::isfinite;
+
+    constexpr boost::multiprecision::float128_type tol = 2 * BOOST_MP_QUAD_MIN;
+
+    // 0, very small, and large magnitude distances all need special handling
+    if (abs(a) == 0 || abs(b) == 0)
+    {
+        return 0;
+    }
+    else if (abs(a) < tol || abs(b) < tol)
+    {
+        BOOST_MATH_THROW_EXCEPTION(std::domain_error("special handling is required for tiny distances. Please use boost::math::float_distance for a slower but safe solution"));
+    }
+
+    if (!(isfinite)(a))
+    {  
+        BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distnace must be finite"));
+    }
+    else if (!(isfinite)(b))
+    {
+        BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distnace must be finite"));
+    }
+
+    static_assert(sizeof(boost::multiprecision::int128_type) == sizeof(boost::multiprecision::float128_type), "float128 is the wrong size");
+
+    boost::multiprecision::int128_type ai;
+    boost::multiprecision::int128_type bi;
+    std::memcpy(&ai, &a, sizeof(boost::multiprecision::float128_type));
+    std::memcpy(&bi, &b, sizeof(boost::multiprecision::float128_type));
+
+    boost::multiprecision::int128_type result = bi - ai;
+
+    if (ai < 0 || bi < 0)
+    {
+        result = -result;
+    }
+
+    return result;
+}
+
+#elif defined(BOOST_MATH_USE_FAST_STANDALONE_FLOAT128)
+__int128 fast_float_distance(__float128 a, __float128 b)
+{
+    constexpr __float128 tol = 2 * static_cast<__float128>(1) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * 
+                               static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * 
+                               static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * 
+                               static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * 
+                               static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * 
+                               static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) / 1073741824;
+
+    // 0, very small, and large magnitude distances all need special handling
+    if (::fabsq(a) == 0 || ::fabsq(b) == 0)
+    {
+        return 0;
+    }
+    else if (::fabsq(a) < tol || ::fabsq(b) < tol)
+    {
+        BOOST_MATH_THROW_EXCEPTION(std::domain_error("special handling is required for tiny distances. Please use boost::math::float_distance for a slower but safe solution"));
+    }
+
+    if (!(::isinfq)(a) && !(::isnanq)(a))
+    {  
+        BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distnace must be finite"));
+    }
+    else if (!(::isinfq)(b) && !(::isnanq)(b))
+    {
+        BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distnace must be finite"));
+    }
+
+    static_assert(sizeof(__int128) == sizeof(__float128));
+
+    __int128 ai;
+    __int128 bi;
+    std::memcpy(&ai, &a, sizeof(__float128));
+    std::memcpy(&bi, &b, sizeof(__float128));
+
+    __int128 result = bi - ai;
+
+    if (ai < 0 || bi < 0)
+    {
+        result = -result;
+    }
+
+    return result;
+}
+#endif
+
+}} // Namespaces
+
+#endif
diff --git a/include/boost/math/special_functions/next.hpp b/include/boost/math/special_functions/next.hpp
@@ -16,9 +16,11 @@
 #include <boost/math/special_functions/sign.hpp>
 #include <boost/math/special_functions/trunc.hpp>
 #include <boost/math/tools/traits.hpp>
+#include <boost/math/tools/config.hpp>
 #include <type_traits>
 #include <cfloat>
-
+#include <cstdint>
+#include <cstring>
 
 #if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
 #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__)
@@ -717,6 +719,103 @@ typename tools::promote_args<T, U>::type float_distance(const T& a, const U& b)
    return boost::math::float_distance(a, b, policies::policy<>());
 }
 
+// https://randomascii.wordpress.com/2012/01/23/stupid-float-tricks-2/
+// https://blog.regehr.org/archives/959
+inline std::int32_t float_distance(float a, float b)
+{
+   using std::abs;
+   using std::isfinite;
+   constexpr auto tol = 2 * (std::numeric_limits<float>::min)();
+
+   // 0, very small, and large magnitude distances all need special handling
+   if (abs(a) == 0 || abs(b) == 0)
+   {
+      return static_cast<std::int32_t>(float_distance(a, b, policies::policy<>()));
+   }
+   else if (abs(a) < tol || abs(b) < tol)
+   {
+      return static_cast<std::int32_t>(float_distance(a, b, policies::policy<>()));
+   }
+
+   static const char* function = "float_distance<%1%>(%1%, %1%)";
+   if(!(boost::math::isfinite)(a))
+   {
+      return policies::raise_domain_error<float>(
+         function,
+         "Argument a must be finite, but got %1%", a, policies::policy<>());
+   }
+   if(!(boost::math::isfinite)(b))
+   {
+      return policies::raise_domain_error<float>(
+         function,
+         "Argument b must be finite, but got %1%", b, policies::policy<>());
+   }
+
+   static_assert(sizeof(float) == sizeof(std::int32_t), "float is incorrect size.");
+
+   std::int32_t ai;
+   std::int32_t bi;
+   std::memcpy(&ai, &a, sizeof(float));
+   std::memcpy(&bi, &b, sizeof(float));
+
+   auto result = bi - ai;
+
+   if (ai < 0 || bi < 0)
+   {
+      result = -result;
+   }
+
+   return result;
+}
+
+inline std::int64_t float_distance(double a, double b)
+{
+   using std::abs;
+   using std::isfinite;
+   constexpr auto tol = 2 * (std::numeric_limits<double>::min)();
+
+   // 0, very small, and large magnitude distances all need special handling
+   if (abs(a) == 0 || abs(b) == 0)
+   {
+      return static_cast<std::int64_t>(float_distance(a, b, policies::policy<>()));
+   }
+   else if (abs(a) < tol || abs(b) < tol)
+   {
+      return static_cast<std::int64_t>(float_distance(a, b, policies::policy<>()));
+   }
+
+   static const char* function = "float_distance<%1%>(%1%, %1%)";
+   if(!(boost::math::isfinite)(a))
+   {
+      return policies::raise_domain_error<double>(
+         function,
+         "Argument a must be finite, but got %1%", a, policies::policy<>());
+   }
+   if(!(boost::math::isfinite)(b))
+   {
+      return policies::raise_domain_error<double>(
+         function,
+         "Argument b must be finite, but got %1%", b, policies::policy<>());
+   }
+
+
+   static_assert(sizeof(double) == sizeof(std::int64_t), "double is incorrect size.");
+
+   std::int64_t ai;
+   std::int64_t bi;
+   std::memcpy(&ai, &a, sizeof(double));
+   std::memcpy(&bi, &b, sizeof(double));
+
+   auto result = bi - ai;
+
+   if (ai < 0 || bi < 0)
+   {
+      result = -result;
+   }
+
+   return result;
+}
+
 namespace detail{
 
 template <class T, class Policy>

diff --git a/reporting/performance/new_next_performance.cpp b/reporting/performance/new_next_performance.cpp
@@ -0,0 +1,126 @@
+//  (C) Copyright Matt Borland 2022.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <boost/math/special_functions/next.hpp>
+#include <benchmark/benchmark.h>
+
+template <typename T>
+void float_distance(benchmark::State& state)
+{
+    const auto difference = static_cast<int>(state.range(0));
+    T left = 2;
+    T right = boost::math::float_advance(left, difference);
+
+    for (auto _ : state)
+    {
+        benchmark::DoNotOptimize(boost::math::float_distance(left, right));
+    }
+    state.SetComplexityN(state.range(0));
+}
+
+BENCHMARK_TEMPLATE(float_distance, float)->RangeMultiplier(2)->Range(1 << 1, 1 << 14)->Complexity()->UseRealTime();
+BENCHMARK_TEMPLATE(float_distance, double)->RangeMultiplier(2)->Range(1 << 1, 1 << 14)->Complexity()->UseRealTime();
+
+BENCHMARK_MAIN();
+
+/*
+Run on Apple M1 Pro Arch using Apple Clang 14.0.0 (15OCT22)
+
+Original performance (Boost 1.80.0):
+
+Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory
+This does not affect benchmark measurements, only the metadata output.
+2022-10-15T15:24:07-07:00
+Running ./new_next_performance
+Run on (10 X 24.0916 MHz CPU s)
+CPU Caches:
+  L1 Data 64 KiB
+  L1 Instruction 128 KiB
+  L2 Unified 4096 KiB (x10)
+Load Average: 1.86, 2.53, 5.83
+---------------------------------------------------------------------------------
+Benchmark                                       Time             CPU   Iterations
+---------------------------------------------------------------------------------
+float_distance<float>/2/real_time            61.4 ns         61.4 ns      9074469
+float_distance<float>/4/real_time            61.7 ns         61.7 ns     11384150
+float_distance<float>/8/real_time            61.4 ns         61.4 ns     10814604
+float_distance<float>/16/real_time           61.7 ns         61.7 ns     11348376
+float_distance<float>/32/real_time           61.4 ns         61.4 ns     11387167
+float_distance<float>/64/real_time           61.6 ns         61.6 ns     11131932
+float_distance<float>/128/real_time          61.4 ns         61.4 ns     11382029
+float_distance<float>/256/real_time          61.4 ns         61.4 ns     11307649
+float_distance<float>/512/real_time          61.4 ns         61.4 ns     11376048
+float_distance<float>/1024/real_time         61.4 ns         61.4 ns     11355748
+float_distance<float>/2048/real_time         61.8 ns         61.8 ns     11373776
+float_distance<float>/4096/real_time         61.4 ns         61.4 ns     11382368
+float_distance<float>/8192/real_time         61.4 ns         61.4 ns     11353453
+float_distance<float>/16384/real_time        61.4 ns         61.4 ns     11378298
+float_distance<float>/real_time_BigO        61.48 (1)       61.47 (1)
+float_distance<float>/real_time_RMS             0 %             0 %
+float_distance<double>/2/real_time           55.6 ns         55.6 ns     12580218
+float_distance<double>/4/real_time           55.6 ns         55.6 ns     12577835
+float_distance<double>/8/real_time           55.6 ns         55.6 ns     12564909
+float_distance<double>/16/real_time          56.2 ns         56.2 ns     12554909
+float_distance<double>/32/real_time          56.0 ns         56.0 ns     12544381
+float_distance<double>/64/real_time          55.6 ns         55.6 ns     12566488
+float_distance<double>/128/real_time         55.6 ns         55.6 ns     12499581
+float_distance<double>/256/real_time         55.6 ns         55.6 ns     12565661
+float_distance<double>/512/real_time         56.1 ns         56.1 ns     12550023
+float_distance<double>/1024/real_time        55.8 ns         55.8 ns     12568603
+float_distance<double>/2048/real_time        55.6 ns         55.6 ns     12546049
+float_distance<double>/4096/real_time        55.6 ns         55.6 ns     12528525
+float_distance<double>/8192/real_time        55.9 ns         55.9 ns     12563030
+float_distance<double>/16384/real_time       56.0 ns         56.0 ns     12447644
+float_distance<double>/real_time_BigO       55.78 (1)       55.78 (1)
+float_distance<double>/real_time_RMS            0 %             0 %
+
+New performance:
+
+Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory
+This does not affect benchmark measurements, only the metadata output.
+2022-10-15T15:31:37-07:00
+Running ./new_next_performance
+Run on (10 X 24.122 MHz CPU s)
+CPU Caches:
+  L1 Data 64 KiB
+  L1 Instruction 128 KiB
+  L2 Unified 4096 KiB (x10)
+Load Average: 2.12, 2.17, 4.26
+---------------------------------------------------------------------------------
+Benchmark                                       Time             CPU   Iterations
+---------------------------------------------------------------------------------
+float_distance<float>/2/real_time            15.8 ns         15.8 ns     42162717
+float_distance<float>/4/real_time            15.9 ns         15.9 ns     44213877
+float_distance<float>/8/real_time            15.8 ns         15.8 ns     43972542
+float_distance<float>/16/real_time           15.8 ns         15.8 ns     44209456
+float_distance<float>/32/real_time           15.8 ns         15.8 ns     44200244
+float_distance<float>/64/real_time           15.8 ns         15.8 ns     44239293
+float_distance<float>/128/real_time          15.8 ns         15.8 ns     44171202
+float_distance<float>/256/real_time          15.8 ns         15.8 ns     44241507
+float_distance<float>/512/real_time          15.9 ns         15.8 ns     44230034
+float_distance<float>/1024/real_time         15.8 ns         15.8 ns     44241554
+float_distance<float>/2048/real_time         15.8 ns         15.8 ns     44220802
+float_distance<float>/4096/real_time         15.8 ns         15.8 ns     44220441
+float_distance<float>/8192/real_time         15.9 ns         15.9 ns     44213994
+float_distance<float>/16384/real_time        15.8 ns         15.8 ns     44215413
+float_distance<float>/real_time_BigO        15.83 (1)       15.83 (1)
+float_distance<float>/real_time_RMS             0 %             0 %
+float_distance<double>/2/real_time           15.5 ns         15.5 ns     45098165
+float_distance<double>/4/real_time           15.6 ns         15.6 ns     45065465
+float_distance<double>/8/real_time           15.5 ns         15.5 ns     45058733
+float_distance<double>/16/real_time          15.8 ns         15.7 ns     45078404
+float_distance<double>/32/real_time          15.5 ns         15.5 ns     44832734
+float_distance<double>/64/real_time          15.5 ns         15.5 ns     45077303
+float_distance<double>/128/real_time         15.5 ns         15.5 ns     45067255
+float_distance<double>/256/real_time         15.5 ns         15.5 ns     45073844
+float_distance<double>/512/real_time         15.6 ns         15.6 ns     45109342
+float_distance<double>/1024/real_time        15.5 ns         15.5 ns     44845180
+float_distance<double>/2048/real_time        15.5 ns         15.5 ns     45051846
+float_distance<double>/4096/real_time        15.5 ns         15.5 ns     45064317
+float_distance<double>/8192/real_time        15.5 ns         15.5 ns     45115653
+float_distance<double>/16384/real_time       15.5 ns         15.5 ns     45067642
+float_distance<double>/real_time_BigO       15.54 (1)       15.54 (1)
+float_distance<double>/real_time_RMS            0 %             0 %
+*/
diff --git a/test/Jamfile.v2 b/test/Jamfile.v2
@@ -527,6 +527,7 @@ test-suite special_fun :
    [ run test_ldouble_simple.cpp ../../test/build//boost_unit_test_framework  ]
    # Needs to run in release mode, as it's rather slow:
    [ run test_next.cpp pch ../../test/build//boost_unit_test_framework : : : release  ]
+   [ run test_fast_float_distance.cpp ../../test/build//boost_unit_test_framework : : : release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" : <build>no ] ]
    [ run test_next_decimal.cpp pch ../../test/build//boost_unit_test_framework : : : release  ]
    [ run test_owens_t.cpp ../../test/build//boost_unit_test_framework  ]
    [ run test_polygamma.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]