Apply work around for SVML issues on Linux

Rinzii · Sep 1, 2024 · a148cc0 · a148cc0
1 parent a0e64cc
commit a148cc0
Show file tree

Hide file tree

Showing 4 changed files with 83 additions and 3 deletions.
diff --git a/include/ccmath/internal/math/runtime/simd/func/impl/sse2/pow.hpp b/include/ccmath/internal/math/runtime/simd/func/impl/sse2/pow.hpp
@@ -14,18 +14,38 @@
 
 #ifdef CCMATH_HAS_SIMD
 	#ifdef CCMATH_HAS_SIMD_SSE2
+		#include "ccmath/internal/config/platform/linux.hpp"
+
+		#if defined(CCM_TARGET_PLATFORM_LINUX)
+			#include "ccmath/internal/math/generic/func/power/pow_gen.hpp"
+		#endif
+
 namespace ccm::intrin
 {
 	CCM_ALWAYS_INLINE simd<float, abi::sse2> pow(simd<float, abi::sse2> const & a, simd<float, abi::sse2> const & b)
 	{
 		// NOLINTNEXTLINE(modernize-return-braced-init-list)
+		// _mm_pow_ps is a part of SVML which is a part of intel's DPC++ compiler
+		// It appears Windows and macOS have SVML out the box so we only care about linux.
+		#if !defined(CCM_TARGET_PLATFORM_LINUX)
 		return simd<float, abi::sse2>(_mm_pow_ps(a.get(), b.get()));
+		#else
+		// TODO: Replace this with a refined solution. For the time being this is temporary.
+		return simd<float, abi::sse2>(gen::pow_gen(a.convert(), b.convert()));
+		#endif
 	}
 
 	CCM_ALWAYS_INLINE simd<double, abi::sse2> pow(simd<double, abi::sse2> const & a, simd<double, abi::sse2> const & b)
 	{
 		// NOLINTNEXTLINE(modernize-return-braced-init-list)
-		return simd<double, abi::sse2>(_mm_pow_pd(a.get(), b.get()));
+		// _mm_pow_ps is a part of SVML which is a part of intel's DPC++ compiler
+		// It appears Windows and macOS have SVML out the box so we only care about linux.
+		#if !defined(CCM_TARGET_PLATFORM_LINUX)
+		return simd<float, abi::sse2>(_mm_pow_pd(a.get(), b.get()));
+		#else
+		// TODO: Replace this with a refined solution. For the time being this is temporary.
+		return simd<double, abi::sse2>(gen::pow_gen(a.convert(), b.convert()));
+		#endif
 	}
 } // namespace ccm::intrin
 

diff --git a/include/ccmath/internal/math/runtime/simd/func/impl/sse3/pow.hpp b/include/ccmath/internal/math/runtime/simd/func/impl/sse3/pow.hpp
@@ -14,18 +14,38 @@
 
 #ifdef CCMATH_HAS_SIMD
 	#ifdef CCMATH_HAS_SIMD_SSE3
+		#include "ccmath/internal/config/platform/linux.hpp"
+
+		#if defined(CCM_TARGET_PLATFORM_LINUX)
+			#include "ccmath/internal/math/generic/func/power/pow_gen.hpp"
+		#endif
+
 namespace ccm::intrin
 {
-	CCM_ALWAYS_INLINE simd<float, abi::sse3> pow(simd<float, abi::sse3> const & a, simd<float, abi::sse3> const & b)
+	CCM_ALWAYS_INLINE simd<float, abi::sse3> sqrt(simd<float, abi::sse3> const & a, simd<float, abi::sse3> const & b)
 	{
 		// NOLINTNEXTLINE(modernize-return-braced-init-list)
+		// _mm_pow_ps is a part of SVML which is a part of intel's DPC++ compiler
+		// It appears Windows and macOS have SVML out the box so we only care about linux.
+		#if !defined(CCM_TARGET_PLATFORM_LINUX)
 		return simd<float, abi::sse3>(_mm_pow_ps(a.get(), b.get()));
+		#else
+		// TODO: Replace this with a refined solution. For the time being this is temporary.
+		return simd<float, abi::sse3>(gen::pow_gen(a.convert(), b.convert()));
+		#endif
 	}
 
-	CCM_ALWAYS_INLINE simd<double, abi::sse3> pow(simd<double, abi::sse3> const & a, simd<double, abi::sse3> const & b)
+	CCM_ALWAYS_INLINE simd<double, abi::sse3> sqrt(simd<double, abi::sse3> const & a, simd<double, abi::sse3> const & b)
 	{
 		// NOLINTNEXTLINE(modernize-return-braced-init-list)
+		// _mm_pow_pd is a part of SVML which is a part of intel's DPC++ compiler
+		// It appears Windows and macOS have SVML out the box so we only care about linux.
+		#if !defined(CCM_TARGET_PLATFORM_LINUX)
 		return simd<double, abi::sse3>(_mm_pow_pd(a.get(), b.get()));
+		#else
+		// TODO: Replace this with a refined solution. For the time being this is temporary.
+		return simd<double, abi::sse3>(gen::pow_gen(a.convert(), b.convert()));
+		#endif
 	}
 } // namespace ccm::intrin
 

diff --git a/include/ccmath/internal/math/runtime/simd/func/impl/sse4/pow.hpp b/include/ccmath/internal/math/runtime/simd/func/impl/sse4/pow.hpp
@@ -14,18 +14,38 @@
 
 #ifdef CCMATH_HAS_SIMD
 	#ifdef CCMATH_HAS_SIMD_SSE4
+		#include "ccmath/internal/config/platform/linux.hpp"
+
+		#if defined(CCM_TARGET_PLATFORM_LINUX)
+			#include "ccmath/internal/math/generic/func/power/pow_gen.hpp"
+		#endif
+
 namespace ccm::intrin
 {
 	CCM_ALWAYS_INLINE simd<float, abi::sse4> pow(simd<float, abi::sse4> const & a, simd<float, abi::sse4> const & b)
 	{
 		// NOLINTNEXTLINE(modernize-return-braced-init-list)
+		// _mm_pow_ps is a part of SVML which is a part of intel's DPC++ compiler
+		// It appears Windows and macOS have SVML out the box so we only care about linux.
+		#if !defined(CCM_TARGET_PLATFORM_LINUX)
 		return simd<float, abi::sse4>(_mm_pow_ps(a.get(), b.get()));
+		#else
+		// TODO: Replace this with a refined solution. For the time being this is temporary.
+		return simd<float, abi::sse4>(gen::pow_gen(a.convert(), b.convert()));
+		#endif
 	}
 
 	CCM_ALWAYS_INLINE simd<double, abi::sse4> pow(simd<double, abi::sse4> const & a, simd<double, abi::sse4> const & b)
 	{
 		// NOLINTNEXTLINE(modernize-return-braced-init-list)
+		// _mm_pow_pd is a part of SVML which is a part of intel's DPC++ compiler
+		// It appears Windows and macOS have SVML out the box so we only care about linux.
+		#if !defined(CCM_TARGET_PLATFORM_LINUX)
 		return simd<double, abi::sse4>(_mm_pow_pd(a.get(), b.get()));
+		#else
+		// TODO: Replace this with a refined solution. For the time being this is temporary.
+		return simd<double, abi::sse4>(gen::pow_gen(a.convert(), b.convert()));
+		#endif
 	}
 } // namespace ccm::intrin
 

diff --git a/include/ccmath/internal/math/runtime/simd/func/impl/ssse3/pow.hpp b/include/ccmath/internal/math/runtime/simd/func/impl/ssse3/pow.hpp
@@ -14,18 +14,38 @@
 
 #ifdef CCMATH_HAS_SIMD
 	#ifdef CCMATH_HAS_SIMD_SSSE3
+		#include "ccmath/internal/config/platform/linux.hpp"
+
+		#if defined(CCM_TARGET_PLATFORM_LINUX)
+			#include "ccmath/internal/math/generic/func/power/pow_gen.hpp"
+		#endif
+
 namespace ccm::intrin
 {
 	CCM_ALWAYS_INLINE simd<float, abi::ssse3> pow(simd<float, abi::ssse3> const & a, simd<float, abi::ssse3> const & b)
 	{
 		// NOLINTNEXTLINE(modernize-return-braced-init-list)
+		// _mm_pow_ps is a part of SVML which is a part of intel's DPC++ compiler
+		// It appears Windows and macOS have SVML out the box so we only care about linux.
+		#if !defined(CCM_TARGET_PLATFORM_LINUX)
 		return simd<float, abi::ssse3>(_mm_pow_ps(a.get(), b.get()));
+		#else
+		// TODO: Replace this with a refined solution. For the time being this is temporary.
+		return simd<float, abi::ssse3>(gen::pow_gen(a.convert(), b.convert()));
+		#endif
 	}
 
 	CCM_ALWAYS_INLINE simd<double, abi::ssse3> pow(simd<double, abi::ssse3> const & a, simd<double, abi::ssse3> const & b)
 	{
 		// NOLINTNEXTLINE(modernize-return-braced-init-list)
+		// _mm_pow_pd is a part of SVML which is a part of intel's DPC++ compiler
+		// It appears Windows and macOS have SVML out the box so we only care about linux.
+		#if !defined(CCM_TARGET_PLATFORM_LINUX)
 		return simd<double, abi::ssse3>(_mm_pow_pd(a.get(), b.get()));
+		#else
+		// TODO: Replace this with a refined solution. For the time being this is temporary.
+		return simd<double, abi::ssse3>(gen::pow_gen(a.convert(), b.convert()));
+		#endif
 	}
 } // namespace ccm::intrin