Skip to content

Commit

Permalink
Upgrade xsimd to a48ab430d4b84ecd5449180ee1c6d2eed67c4191
Browse files Browse the repository at this point in the history
Fix #2164
  • Loading branch information
serge-sans-paille committed Dec 21, 2023
1 parent c4f70cf commit 4c02f5e
Show file tree
Hide file tree
Showing 60 changed files with 5,856 additions and 385 deletions.
61 changes: 61 additions & 0 deletions pythran/xsimd/arch/generic/xsimd_generic_arithmetic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#define XSIMD_GENERIC_ARITHMETIC_HPP

#include <complex>
#include <limits>
#include <type_traits>

#include "./xsimd_generic_details.hpp"
Expand Down Expand Up @@ -126,6 +127,20 @@ namespace xsimd
return { res_r, res_i };
}

// hadd
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
inline T hadd(batch<T, A> const& self, requires_arch<generic>) noexcept
{
alignas(A::alignment()) T buffer[batch<T, A>::size];
self.store_aligned(buffer);
T res = 0;
for (T val : buffer)
{
res += val;
}
return res;
}

// incr
template <class A, class T>
inline batch<T, A> incr(batch<T, A> const& self, requires_arch<generic>) noexcept
Expand All @@ -149,12 +164,45 @@ namespace xsimd
self, other);
}

// rotl
template <class A, class T, class STy>
inline batch<T, A> rotl(batch<T, A> const& self, STy other, requires_arch<generic>) noexcept
{
constexpr auto N = std::numeric_limits<T>::digits;
return (self << other) | (self >> (N - other));
}

// rotr
template <class A, class T, class STy>
inline batch<T, A> rotr(batch<T, A> const& self, STy other, requires_arch<generic>) noexcept
{
constexpr auto N = std::numeric_limits<T>::digits;
return (self >> other) | (self << (N - other));
}

// sadd
template <class A>
inline batch<float, A> sadd(batch<float, A> const& self, batch<float, A> const& other, requires_arch<generic>) noexcept
{
return add(self, other); // no saturated arithmetic on floating point numbers
}
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
inline batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
{
if (std::is_signed<T>::value)
{
auto mask = (other >> (8 * sizeof(T) - 1));
auto self_pos_branch = min(std::numeric_limits<T>::max() - other, self);
auto self_neg_branch = max(std::numeric_limits<T>::min() - other, self);
return other + select(batch_bool<T, A>(mask.data), self_neg_branch, self_pos_branch);
}
else
{
const auto diffmax = std::numeric_limits<T>::max() - self;
const auto mindiff = min(diffmax, other);
return self + mindiff;
}
}
template <class A>
inline batch<double, A> sadd(batch<double, A> const& self, batch<double, A> const& other, requires_arch<generic>) noexcept
{
Expand All @@ -167,6 +215,19 @@ namespace xsimd
{
return sub(self, other); // no saturated arithmetic on floating point numbers
}
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
inline batch<T, A> ssub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
{
if (std::is_signed<T>::value)
{
return sadd(self, -other);
}
else
{
const auto diff = min(self, other);
return self - diff;
}
}
template <class A>
inline batch<double, A> ssub(batch<double, A> const& self, batch<double, A> const& other, requires_arch<generic>) noexcept
{
Expand Down
12 changes: 12 additions & 0 deletions pythran/xsimd/arch/generic/xsimd_generic_complex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,18 @@ namespace xsimd
{
return batch_bool<T, A>(isnan(self.real()) || isnan(self.imag()));
}

template <class A, class T>
inline batch_bool<T, A> isinf(batch<std::complex<T>, A> const& self, requires_arch<generic>) noexcept
{
return batch_bool<T, A>(isinf(self.real()) || isinf(self.imag()));
}

template <class A, class T>
inline batch_bool<T, A> isfinite(batch<std::complex<T>, A> const& self, requires_arch<generic>) noexcept
{
return batch_bool<T, A>(isfinite(self.real()) && isfinite(self.imag()));
}
}
}

Expand Down
34 changes: 33 additions & 1 deletion pythran/xsimd/arch/generic/xsimd_generic_details.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ namespace xsimd
template <class T, class A>
inline batch_bool<T, A> is_odd(batch<T, A> const& self) noexcept;
template <class T, class A>
inline batch_bool<T, A> isinf(batch<T, A> const& self) noexcept;
inline typename batch<T, A>::batch_bool_type isinf(batch<T, A> const& self) noexcept;
template <class T, class A>
inline typename batch<T, A>::batch_bool_type isfinite(batch<T, A> const& self) noexcept;
template <class T, class A>
inline typename batch<T, A>::batch_bool_type isnan(batch<T, A> const& self) noexcept;
template <class T, class A>
Expand Down Expand Up @@ -178,6 +180,36 @@ namespace xsimd
{
return bitwise_cast<int64_t>(self);
}

// Provide a generic uint32_t -> float cast only if we have a
// non-generic int32_t -> float fast_cast
template <class A, class _ = decltype(fast_cast(std::declval<batch<int32_t, A> const&>(), std::declval<batch<float, A> const&>(), A {}))>
inline batch<float, A> fast_cast(batch<uint32_t, A> const& v, batch<float, A> const&, requires_arch<generic>) noexcept
{
// see https://stackoverflow.com/questions/34066228/how-to-perform-uint32-float-conversion-with-sse
batch<uint32_t, A> msk_lo(0xFFFF);
batch<float, A> cnst65536f(65536.0f);

auto v_lo = batch_cast<int32_t>(v & msk_lo); /* extract the 16 lowest significant bits of self */
auto v_hi = batch_cast<int32_t>(v >> 16); /* 16 most significant bits of v */
auto v_lo_flt = batch_cast<float>(v_lo); /* No rounding */
auto v_hi_flt = batch_cast<float>(v_hi); /* No rounding */
v_hi_flt = cnst65536f * v_hi_flt; /* No rounding */
return v_hi_flt + v_lo_flt; /* Rounding may occur here, mul and add may fuse to fma for haswell and newer */
}

// Provide a generic float -> uint32_t cast only if we have a
// non-generic float -> int32_t fast_cast
template <class A, class _ = decltype(fast_cast(std::declval<batch<float, A> const&>(), std::declval<batch<int32_t, A> const&>(), A {}))>
inline batch<uint32_t, A> fast_cast(batch<float, A> const& v, batch<uint32_t, A> const&, requires_arch<generic>) noexcept
{
auto is_large = v >= batch<float, A>(1u << 31);
auto small = bitwise_cast<float>(batch_cast<int32_t>(v));
auto large = bitwise_cast<float>(
batch_cast<int32_t>(v - batch<float, A>(1u << 31))
^ batch<int32_t, A>(1u << 31));
return bitwise_cast<uint32_t>(select(is_large, large, small));
}
}

namespace detail
Expand Down
22 changes: 12 additions & 10 deletions pythran/xsimd/arch/generic/xsimd_generic_math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ namespace xsimd
template <class A>
inline batch<float, A> bitofsign(batch<float, A> const& self, requires_arch<generic>) noexcept
{
return self & constants::minuszero<batch<float, A>>();
return self & constants::signmask<batch<float, A>>();
}
template <class A>
inline batch<double, A> bitofsign(batch<double, A> const& self, requires_arch<generic>) noexcept
{
return self & constants::minuszero<batch<double, A>>();
return self & constants::signmask<batch<double, A>>();
}

// bitwise_cast
Expand Down Expand Up @@ -470,16 +470,18 @@ namespace xsimd
batch_type x = abs(self);
auto test0 = self < batch_type(0.);
batch_type r1(0.);
auto test1 = 3.f * x < 2.f;
batch_type z = x / (batch_type(1.) + x);
if (any(3.f * x < 2.f))
if (any(test1))
{
r1 = detail::erf_kernel<batch_type>::erfc3(z);
if (all(test1))
return select(test0, batch_type(2.) - r1, r1);
}
else
{
z -= batch_type(0.4f);
r1 = exp(-x * x) * detail::erf_kernel<batch_type>::erfc2(z);
}

z -= batch_type(0.4f);
batch_type r2 = exp(-x * x) * detail::erf_kernel<batch_type>::erfc2(z);
r1 = select(test1, r1, r2);
#ifndef XSIMD_NO_INFINITIES
r1 = select(x == constants::infinity<batch_type>(), batch_type(0.), r1);
#endif
Expand Down Expand Up @@ -1849,7 +1851,7 @@ namespace xsimd
{
using U = as_integer_t<float>;
return kernel::detail::apply_transform<U>([](float x) noexcept -> U
{ return std::lroundf(x); },
{ return std::nearbyintf(x); },
self);
}

Expand All @@ -1859,7 +1861,7 @@ namespace xsimd
{
using U = as_integer_t<double>;
return kernel::detail::apply_transform<U>([](double x) noexcept -> U
{ return std::llround(x); },
{ return std::nearbyint(x); },
self);
}

Expand Down
Loading

0 comments on commit 4c02f5e

Please sign in to comment.