Skip to content

Commit

Permalink
Apply work around for SVML issues on Linux
Browse files Browse the repository at this point in the history
  • Loading branch information
Rinzii committed Sep 1, 2024
1 parent a0e64cc commit a148cc0
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 3 deletions.
22 changes: 21 additions & 1 deletion include/ccmath/internal/math/runtime/simd/func/impl/sse2/pow.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,38 @@

#ifdef CCMATH_HAS_SIMD
#ifdef CCMATH_HAS_SIMD_SSE2
#include "ccmath/internal/config/platform/linux.hpp"

#if defined(CCM_TARGET_PLATFORM_LINUX)
#include "ccmath/internal/math/generic/func/power/pow_gen.hpp"
#endif

namespace ccm::intrin
{
CCM_ALWAYS_INLINE simd<float, abi::sse2> pow(simd<float, abi::sse2> const & a, simd<float, abi::sse2> const & b)
{
// NOLINTNEXTLINE(modernize-return-braced-init-list)
// _mm_pow_ps is a part of SVML which is a part of intel's DPC++ compiler
// It appears Windows and macOS have SVML out the box so we only care about linux.
#if !defined(CCM_TARGET_PLATFORM_LINUX)
return simd<float, abi::sse2>(_mm_pow_ps(a.get(), b.get()));
#else
// TODO: Replace this with a refined solution. For the time being this is temporary.
return simd<float, abi::sse2>(gen::pow_gen(a.convert(), b.convert()));
#endif
}

CCM_ALWAYS_INLINE simd<double, abi::sse2> pow(simd<double, abi::sse2> const & a, simd<double, abi::sse2> const & b)
{
// NOLINTNEXTLINE(modernize-return-braced-init-list)
return simd<double, abi::sse2>(_mm_pow_pd(a.get(), b.get()));
// _mm_pow_ps is a part of SVML which is a part of intel's DPC++ compiler
// It appears Windows and macOS have SVML out the box so we only care about linux.
#if !defined(CCM_TARGET_PLATFORM_LINUX)
return simd<float, abi::sse2>(_mm_pow_pd(a.get(), b.get()));
#else
// TODO: Replace this with a refined solution. For the time being this is temporary.
return simd<double, abi::sse2>(gen::pow_gen(a.convert(), b.convert()));
#endif
}
} // namespace ccm::intrin

Expand Down
24 changes: 22 additions & 2 deletions include/ccmath/internal/math/runtime/simd/func/impl/sse3/pow.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,38 @@

#ifdef CCMATH_HAS_SIMD
#ifdef CCMATH_HAS_SIMD_SSE3
#include "ccmath/internal/config/platform/linux.hpp"

#if defined(CCM_TARGET_PLATFORM_LINUX)
#include "ccmath/internal/math/generic/func/power/pow_gen.hpp"
#endif

namespace ccm::intrin
{
CCM_ALWAYS_INLINE simd<float, abi::sse3> pow(simd<float, abi::sse3> const & a, simd<float, abi::sse3> const & b)
CCM_ALWAYS_INLINE simd<float, abi::sse3> sqrt(simd<float, abi::sse3> const & a, simd<float, abi::sse3> const & b)
{
// NOLINTNEXTLINE(modernize-return-braced-init-list)
// _mm_pow_ps is a part of SVML which is a part of intel's DPC++ compiler
// It appears Windows and macOS have SVML out the box so we only care about linux.
#if !defined(CCM_TARGET_PLATFORM_LINUX)
return simd<float, abi::sse3>(_mm_pow_ps(a.get(), b.get()));
#else
// TODO: Replace this with a refined solution. For the time being this is temporary.
return simd<float, abi::sse3>(gen::pow_gen(a.convert(), b.convert()));
#endif
}

CCM_ALWAYS_INLINE simd<double, abi::sse3> pow(simd<double, abi::sse3> const & a, simd<double, abi::sse3> const & b)
CCM_ALWAYS_INLINE simd<double, abi::sse3> sqrt(simd<double, abi::sse3> const & a, simd<double, abi::sse3> const & b)
{
// NOLINTNEXTLINE(modernize-return-braced-init-list)
// _mm_pow_pd is a part of SVML which is a part of intel's DPC++ compiler
// It appears Windows and macOS have SVML out the box so we only care about linux.
#if !defined(CCM_TARGET_PLATFORM_LINUX)
return simd<double, abi::sse3>(_mm_pow_pd(a.get(), b.get()));
#else
// TODO: Replace this with a refined solution. For the time being this is temporary.
return simd<double, abi::sse3>(gen::pow_gen(a.convert(), b.convert()));
#endif
}
} // namespace ccm::intrin

Expand Down
20 changes: 20 additions & 0 deletions include/ccmath/internal/math/runtime/simd/func/impl/sse4/pow.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,38 @@

#ifdef CCMATH_HAS_SIMD
#ifdef CCMATH_HAS_SIMD_SSE4
#include "ccmath/internal/config/platform/linux.hpp"

#if defined(CCM_TARGET_PLATFORM_LINUX)
#include "ccmath/internal/math/generic/func/power/pow_gen.hpp"
#endif

namespace ccm::intrin
{
CCM_ALWAYS_INLINE simd<float, abi::sse4> pow(simd<float, abi::sse4> const & a, simd<float, abi::sse4> const & b)
{
// NOLINTNEXTLINE(modernize-return-braced-init-list)
// _mm_pow_ps is a part of SVML which is a part of intel's DPC++ compiler
// It appears Windows and macOS have SVML out the box so we only care about linux.
#if !defined(CCM_TARGET_PLATFORM_LINUX)
return simd<float, abi::sse4>(_mm_pow_ps(a.get(), b.get()));
#else
// TODO: Replace this with a refined solution. For the time being this is temporary.
return simd<float, abi::sse4>(gen::pow_gen(a.convert(), b.convert()));
#endif
}

CCM_ALWAYS_INLINE simd<double, abi::sse4> pow(simd<double, abi::sse4> const & a, simd<double, abi::sse4> const & b)
{
// NOLINTNEXTLINE(modernize-return-braced-init-list)
// _mm_pow_pd is a part of SVML which is a part of intel's DPC++ compiler
// It appears Windows and macOS have SVML out the box so we only care about linux.
#if !defined(CCM_TARGET_PLATFORM_LINUX)
return simd<double, abi::sse4>(_mm_pow_pd(a.get(), b.get()));
#else
// TODO: Replace this with a refined solution. For the time being this is temporary.
return simd<double, abi::sse4>(gen::pow_gen(a.convert(), b.convert()));
#endif
}
} // namespace ccm::intrin

Expand Down
20 changes: 20 additions & 0 deletions include/ccmath/internal/math/runtime/simd/func/impl/ssse3/pow.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,38 @@

#ifdef CCMATH_HAS_SIMD
#ifdef CCMATH_HAS_SIMD_SSSE3
#include "ccmath/internal/config/platform/linux.hpp"

#if defined(CCM_TARGET_PLATFORM_LINUX)
#include "ccmath/internal/math/generic/func/power/pow_gen.hpp"
#endif

namespace ccm::intrin
{
CCM_ALWAYS_INLINE simd<float, abi::ssse3> pow(simd<float, abi::ssse3> const & a, simd<float, abi::ssse3> const & b)
{
// NOLINTNEXTLINE(modernize-return-braced-init-list)
// _mm_pow_ps is a part of SVML which is a part of intel's DPC++ compiler
// It appears Windows and macOS have SVML out the box so we only care about linux.
#if !defined(CCM_TARGET_PLATFORM_LINUX)
return simd<float, abi::ssse3>(_mm_pow_ps(a.get(), b.get()));
#else
// TODO: Replace this with a refined solution. For the time being this is temporary.
return simd<float, abi::ssse3>(gen::pow_gen(a.convert(), b.convert()));
#endif
}

CCM_ALWAYS_INLINE simd<double, abi::ssse3> pow(simd<double, abi::ssse3> const & a, simd<double, abi::ssse3> const & b)
{
// NOLINTNEXTLINE(modernize-return-braced-init-list)
// _mm_pow_pd is a part of SVML which is a part of intel's DPC++ compiler
// It appears Windows and macOS have SVML out the box so we only care about linux.
#if !defined(CCM_TARGET_PLATFORM_LINUX)
return simd<double, abi::ssse3>(_mm_pow_pd(a.get(), b.get()));
#else
// TODO: Replace this with a refined solution. For the time being this is temporary.
return simd<double, abi::ssse3>(gen::pow_gen(a.convert(), b.convert()));
#endif
}
} // namespace ccm::intrin

Expand Down

0 comments on commit a148cc0

Please sign in to comment.