Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade xsimd to a48ab430d4b84ecd5449180ee1c6d2eed67c4191 #2166

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions pythran/xsimd/arch/generic/xsimd_generic_arithmetic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#define XSIMD_GENERIC_ARITHMETIC_HPP

#include <complex>
#include <limits>
#include <type_traits>

#include "./xsimd_generic_details.hpp"
Expand Down Expand Up @@ -126,6 +127,20 @@ namespace xsimd
return { res_r, res_i };
}

// hadd
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
inline T hadd(batch<T, A> const& self, requires_arch<generic>) noexcept
{
alignas(A::alignment()) T buffer[batch<T, A>::size];
self.store_aligned(buffer);
T res = 0;
for (T val : buffer)
{
res += val;
}
return res;
}

// incr
template <class A, class T>
inline batch<T, A> incr(batch<T, A> const& self, requires_arch<generic>) noexcept
Expand All @@ -149,12 +164,45 @@ namespace xsimd
self, other);
}

// rotl
template <class A, class T, class STy>
inline batch<T, A> rotl(batch<T, A> const& self, STy other, requires_arch<generic>) noexcept
{
constexpr auto N = std::numeric_limits<T>::digits;
return (self << other) | (self >> (N - other));
}

// rotr
template <class A, class T, class STy>
inline batch<T, A> rotr(batch<T, A> const& self, STy other, requires_arch<generic>) noexcept
{
constexpr auto N = std::numeric_limits<T>::digits;
return (self >> other) | (self << (N - other));
}

// sadd
template <class A>
inline batch<float, A> sadd(batch<float, A> const& self, batch<float, A> const& other, requires_arch<generic>) noexcept
{
return add(self, other); // no saturated arithmetic on floating point numbers
}
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
inline batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
{
if (std::is_signed<T>::value)
{
auto mask = (other >> (8 * sizeof(T) - 1));
auto self_pos_branch = min(std::numeric_limits<T>::max() - other, self);
auto self_neg_branch = max(std::numeric_limits<T>::min() - other, self);
return other + select(batch_bool<T, A>(mask.data), self_neg_branch, self_pos_branch);
}
else
{
const auto diffmax = std::numeric_limits<T>::max() - self;
const auto mindiff = min(diffmax, other);
return self + mindiff;
}
}
template <class A>
inline batch<double, A> sadd(batch<double, A> const& self, batch<double, A> const& other, requires_arch<generic>) noexcept
{
Expand All @@ -167,6 +215,19 @@ namespace xsimd
{
return sub(self, other); // no saturated arithmetic on floating point numbers
}
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
inline batch<T, A> ssub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
{
if (std::is_signed<T>::value)
{
return sadd(self, -other);
}
else
{
const auto diff = min(self, other);
return self - diff;
}
}
template <class A>
inline batch<double, A> ssub(batch<double, A> const& self, batch<double, A> const& other, requires_arch<generic>) noexcept
{
Expand Down
12 changes: 12 additions & 0 deletions pythran/xsimd/arch/generic/xsimd_generic_complex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,18 @@ namespace xsimd
{
return batch_bool<T, A>(isnan(self.real()) || isnan(self.imag()));
}

template <class A, class T>
inline batch_bool<T, A> isinf(batch<std::complex<T>, A> const& self, requires_arch<generic>) noexcept
{
return batch_bool<T, A>(isinf(self.real()) || isinf(self.imag()));
}

template <class A, class T>
inline batch_bool<T, A> isfinite(batch<std::complex<T>, A> const& self, requires_arch<generic>) noexcept
{
return batch_bool<T, A>(isfinite(self.real()) && isfinite(self.imag()));
}
}
}

Expand Down
34 changes: 33 additions & 1 deletion pythran/xsimd/arch/generic/xsimd_generic_details.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ namespace xsimd
template <class T, class A>
inline batch_bool<T, A> is_odd(batch<T, A> const& self) noexcept;
template <class T, class A>
inline batch_bool<T, A> isinf(batch<T, A> const& self) noexcept;
inline typename batch<T, A>::batch_bool_type isinf(batch<T, A> const& self) noexcept;
template <class T, class A>
inline typename batch<T, A>::batch_bool_type isfinite(batch<T, A> const& self) noexcept;
template <class T, class A>
inline typename batch<T, A>::batch_bool_type isnan(batch<T, A> const& self) noexcept;
template <class T, class A>
Expand Down Expand Up @@ -178,6 +180,36 @@ namespace xsimd
{
return bitwise_cast<int64_t>(self);
}

// Provide a generic uint32_t -> float cast only if we have a
// non-generic int32_t -> float fast_cast
template <class A, class _ = decltype(fast_cast(std::declval<batch<int32_t, A> const&>(), std::declval<batch<float, A> const&>(), A {}))>
inline batch<float, A> fast_cast(batch<uint32_t, A> const& v, batch<float, A> const&, requires_arch<generic>) noexcept
{
// see https://stackoverflow.com/questions/34066228/how-to-perform-uint32-float-conversion-with-sse
batch<uint32_t, A> msk_lo(0xFFFF);
batch<float, A> cnst65536f(65536.0f);

auto v_lo = batch_cast<int32_t>(v & msk_lo); /* extract the 16 lowest significant bits of self */
auto v_hi = batch_cast<int32_t>(v >> 16); /* 16 most significant bits of v */
auto v_lo_flt = batch_cast<float>(v_lo); /* No rounding */
auto v_hi_flt = batch_cast<float>(v_hi); /* No rounding */
v_hi_flt = cnst65536f * v_hi_flt; /* No rounding */
return v_hi_flt + v_lo_flt; /* Rounding may occur here, mul and add may fuse to fma for haswell and newer */
}

// Provide a generic float -> uint32_t cast only if we have a
// non-generic float -> int32_t fast_cast
template <class A, class _ = decltype(fast_cast(std::declval<batch<float, A> const&>(), std::declval<batch<int32_t, A> const&>(), A {}))>
inline batch<uint32_t, A> fast_cast(batch<float, A> const& v, batch<uint32_t, A> const&, requires_arch<generic>) noexcept
{
auto is_large = v >= batch<float, A>(1u << 31);
auto small = bitwise_cast<float>(batch_cast<int32_t>(v));
auto large = bitwise_cast<float>(
batch_cast<int32_t>(v - batch<float, A>(1u << 31))
^ batch<int32_t, A>(1u << 31));
return bitwise_cast<uint32_t>(select(is_large, large, small));
}
}

namespace detail
Expand Down
22 changes: 12 additions & 10 deletions pythran/xsimd/arch/generic/xsimd_generic_math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ namespace xsimd
template <class A>
inline batch<float, A> bitofsign(batch<float, A> const& self, requires_arch<generic>) noexcept
{
return self & constants::minuszero<batch<float, A>>();
return self & constants::signmask<batch<float, A>>();
}
template <class A>
inline batch<double, A> bitofsign(batch<double, A> const& self, requires_arch<generic>) noexcept
{
return self & constants::minuszero<batch<double, A>>();
return self & constants::signmask<batch<double, A>>();
}

// bitwise_cast
Expand Down Expand Up @@ -470,16 +470,18 @@ namespace xsimd
batch_type x = abs(self);
auto test0 = self < batch_type(0.);
batch_type r1(0.);
auto test1 = 3.f * x < 2.f;
batch_type z = x / (batch_type(1.) + x);
if (any(3.f * x < 2.f))
if (any(test1))
{
r1 = detail::erf_kernel<batch_type>::erfc3(z);
if (all(test1))
return select(test0, batch_type(2.) - r1, r1);
}
else
{
z -= batch_type(0.4f);
r1 = exp(-x * x) * detail::erf_kernel<batch_type>::erfc2(z);
}

z -= batch_type(0.4f);
batch_type r2 = exp(-x * x) * detail::erf_kernel<batch_type>::erfc2(z);
r1 = select(test1, r1, r2);
#ifndef XSIMD_NO_INFINITIES
r1 = select(x == constants::infinity<batch_type>(), batch_type(0.), r1);
#endif
Expand Down Expand Up @@ -1849,7 +1851,7 @@ namespace xsimd
{
using U = as_integer_t<float>;
return kernel::detail::apply_transform<U>([](float x) noexcept -> U
{ return std::lroundf(x); },
{ return std::nearbyintf(x); },
self);
}

Expand All @@ -1859,7 +1861,7 @@ namespace xsimd
{
using U = as_integer_t<double>;
return kernel::detail::apply_transform<U>([](double x) noexcept -> U
{ return std::llround(x); },
{ return std::nearbyint(x); },
self);
}

Expand Down
Loading
Loading