From f513b203f6a19d05a0be8a37ad519c0ebac6af9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henry=20Linjam=C3=A4ki?= Date: Thu, 23 Nov 2023 14:52:08 +0200 Subject: [PATCH] Add mapping to OpenCL native_* on -ffast-math Map HIP device builtin functions to corresponding OpenCL native built-ins on -ffast-math. The rationale is based on `-ffast-math` specification [1] which gives permission to approximate transcendental functions (-fapprox-func). [1] https://clang.llvm.org/docs/UsersManual.html#cmdoption-ffast-math --- .../hip/devicelib/double_precision/dp_math.hh | 91 ++++++++++++++- .../hip/devicelib/single_precision/sp_math.hh | 107 +++++++++++++++--- tests/compiler/CMakeLists.txt | 4 + tests/compiler/TestFastMath.hip | 4 + 4 files changed, 187 insertions(+), 19 deletions(-) create mode 100644 tests/compiler/TestFastMath.hip diff --git a/include/hip/devicelib/double_precision/dp_math.hh b/include/hip/devicelib/double_precision/dp_math.hh index db02b4ec2..111b6ae2f 100644 --- a/include/hip/devicelib/double_precision/dp_math.hh +++ b/include/hip/devicelib/double_precision/dp_math.hh @@ -38,7 +38,14 @@ extern "C++" __device__ double atanh(double x); // OpenCL extern "C++" __device__ double cbrt(double x); // OpenCL extern "C++" __device__ double ceil(double x); // OpenCL extern "C++" __device__ double copysign(double x, double y); // OpenCL + +#ifdef __FAST_MATH__ +extern "C++" __device__ double native_cos(double x); // OpenCL +extern "C++" inline __device__ double cos(double x) { return ::native_cos(x); } +#else extern "C++" __device__ double cos(double x); // OpenCL +#endif + extern "C++" __device__ double cosh(double x); // OpenCL extern "C++" __device__ double cospi(double x); // OpenCL @@ -69,9 +76,31 @@ extern "C++" inline __device__ double erfinv(double x) { return ::__ocml_erfinv_f64(x); } +#ifdef __FAST_MATH__ +extern "C++" __device__ double native_exp(double x); // OpenCL +extern "C++" inline __device__ double exp(double x) { return ::native_exp(x); } +#else extern "C++" __device__ double exp(double x); // OpenCL +#endif + +#ifdef __FAST_MATH__ +extern "C++" __device__ double native_exp10(double x); // OpenCL +extern "C++" inline __device__ double exp10(double x) { + return ::native_exp10(x); +} +#else extern "C++" __device__ double exp10(double x); // OpenCL -extern "C++" __device__ double exp2(double x); // OpenCL +#endif + +#ifdef __FAST_MATH__ +extern "C++" __device__ double native_exp2(double x); // OpenCL +extern "C++" inline __device__ double exp2(double x) { + return ::native_exp2(x); +} +#else +extern "C++" __device__ double exp2(double x); // OpenCL +#endif + extern "C++" __device__ double expm1(double x); // OpenCL extern "C++" __device__ double fabs(double x); // OpenCL extern "C++" __device__ double fdim(double x, double y); // OpenCL @@ -116,10 +145,33 @@ extern "C++" inline __device__ long long int llround(double x) { return ::__chip_llround_f64(x); } -extern "C++" __device__ double log(double x); // OpenCL +#ifdef __FAST_MATH__ +extern "C++" __device__ double native_log(double x); // OpenCL +extern "C++" inline __device__ double log(double x) { return ::native_log(x); } +#else +extern "C++" __device__ double log(double x); // OpenCL +#endif + +#ifdef __FAST_MATH__ +extern "C++" __device__ double native_log10(double x); // OpenCL +extern "C++" inline __device__ double log10(double x) { + return ::native_log10(x); +} +#else extern "C++" __device__ double log10(double x); // OpenCL +#endif + extern "C++" __device__ double log1p(double x); // OpenCL -extern "C++" __device__ double log2(double x); // OpenCL + +#ifdef __FAST_MATH__ +extern "C++" __device__ double native_log2(double x); // OpenCL +extern "C++" inline __device__ double log2(double x) { + return ::native_log2(x); +} +#else +extern "C++" __device__ double log2(double x); // OpenCL +#endif + extern "C++" __device__ double logb(double x); // OpenCL extern "C" __device__ long int __chip_lrint_f64(double x); // Custom @@ -214,7 +266,15 @@ extern "C++" inline __device__ double rnorm4d(double a, double b, double c, } extern "C++" __device__ double round(double x); // OpenCL + +#ifdef __FAST_MATH__ +extern "C++" __device__ double native_rsqrt(double x); // OpenCL +extern "C++" inline __device__ double rsqrt(double x) { + return ::native_rsqrt(x); +} +#else extern "C++" __device__ double rsqrt(double x); // OpenCL +#endif extern "C" __device__ double __ocml_scalb_f64(double x, double n); extern "C++" inline __device__ double scalbln(double x, long int n) { @@ -229,7 +289,15 @@ extern "C++" inline __device__ double scalbn(double x, int n) { } extern "C++" __device__ int signbit ( double a ); // OpenCL + +#ifdef __FAST_MATH__ +extern "C++" __device__ double native_sin(double x); // OpenCL +extern "C++" inline __device__ double sin(double x) { + return ::native_sin(x); +} +#else extern "C++" __device__ double sin(double x); // OpenCL +#endif extern "C++" __device__ double sincos(double x, double *sptr); // OpenCL extern "C++" inline __device__ void sincos(double x, double *sptr, @@ -248,8 +316,23 @@ extern "C++" inline __device__ void sincospi(double x, double *sptr, extern "C++" __device__ double sinh(double x); // OpenCL extern "C++" __device__ double sinpi(double x); // OpenCL + +#ifdef __FAST_MATH__ +extern "C++" __device__ double native_sqrt(double x); // OpenCL +extern "C++" inline __device__ double sqrt(double x) { + return ::native_sqrt(x); +} +#else extern "C++" __device__ double sqrt(double x); // OpenCL -extern "C++" __device__ double tan(double x); // OpenCL +#endif + +#ifdef __FAST_MATH__ +extern "C++" __device__ double native_tan(double x); // OpenCL +extern "C++" inline __device__ double tan(double x) { return ::native_tan(x); } +#else +extern "C++" __device__ double tan(double x); // OpenCL +#endif + extern "C++" __device__ double tanh(double x); // OpenCL extern "C++" __device__ double tgamma(double x); // OpenCL extern "C++" __device__ double trunc(double x); // OpenCL diff --git a/include/hip/devicelib/single_precision/sp_math.hh b/include/hip/devicelib/single_precision/sp_math.hh index 01bd79508..0883867b9 100644 --- a/include/hip/devicelib/single_precision/sp_math.hh +++ b/include/hip/devicelib/single_precision/sp_math.hh @@ -72,7 +72,14 @@ extern "C++" inline __device__ float copysignf(float x, float y) { } extern "C++" __device__ float cos(float x); // OpenCL -extern "C++" inline __device__ float cosf(float x) { return ::cos(x); } +extern "C++" __device__ float native_cos(float x); // OpenCL +extern "C++" inline __device__ float cosf(float x) { +#ifdef __FAST_MATH__ + return ::native_cos(x); +#else + return ::cos(x); +#endif +} extern "C++" __device__ float cosh(float x); // OpenCL extern "C++" inline __device__ float coshf(float x) { return ::cosh(x); } @@ -102,13 +109,34 @@ extern "C" __device__ float __ocml_erfinv_f32(float x); // OCML extern "C++" inline __device__ float erfinvf(float x) { return ::__ocml_erfinv_f32(x); } extern "C++" __device__ float exp10(float x); // OpenCL -extern "C++" inline __device__ float exp10f(float x) { return ::exp10(x); } +extern "C++" __device__ float native_exp10(float x); // OpenCL +extern "C++" inline __device__ float exp10f(float x) { +#ifdef __FAST_MATH__ + return ::native_exp10(x); +#else + return ::exp10(x); +#endif +} extern "C++" __device__ float exp2(float x); // OpenCL -extern "C++" inline __device__ float exp2f(float x) { return ::exp2(x); } +extern "C++" __device__ float native_exp2(float x); // OpenCL +extern "C++" inline __device__ float exp2f(float x) { +#ifdef __FAST_MATH__ + return ::native_exp2(x); +#else + return ::exp2(x); +#endif +} extern "C++" __device__ float exp(float x); // OpenCL -extern "C++" inline __device__ float expf(float x) { return ::exp(x); } +extern "C++" __device__ float native_exp(float x); // OpenCL +extern "C++" inline __device__ float expf(float x) { +#ifdef __FAST_MATH__ + return ::native_exp(x); +#else + return ::exp(x); +#endif +} extern "C++" __device__ float expm1(float x); // OpenCL extern "C++" inline __device__ float expm1f(float x) { return ::expm1(x); } @@ -121,12 +149,12 @@ extern "C++" inline __device__ float fdimf(float x, float y) { return ::fdim(x, y); } -// extern "C++" __device__ float native_divide(float x, float y); // OpenCL +extern "C++" __device__ float native_divide(float x, float y); // OpenCL extern "C++" inline __device__ float fdividef(float x, float y) { -#ifdef CHIP_FAST_MATH // TODO check if this is correct +#ifdef __FAST_MATH__ return native_divide(x, y); #else - return x / y; + return x / y; #endif } @@ -206,19 +234,40 @@ extern "C++" inline __device__ long long int llroundf(float x) { } extern "C++" __device__ float log10(float x); // OpenCL -extern "C++" inline __device__ float log10f(float x) { return ::log10(x); } +extern "C++" __device__ float native_log10(float x); // OpenCL +extern "C++" inline __device__ float log10f(float x) { +#ifdef __FAST_MATH__ + return ::native_log10(x); +#else + return ::log10(x); +#endif +} extern "C++" __device__ float log1p(float x); // OpenCL extern "C++" inline __device__ float log1pf(float x) { return ::log1p(x); } extern "C++" __device__ float log2(float x); // OpenCL -extern "C++" inline __device__ float log2f(float x) { return ::log2(x); } +extern "C++" __device__ float native_log2(float x); // OpenCL +extern "C++" inline __device__ float log2f(float x) { +#ifdef __FAST_MATH__ + return ::native_log2(x); +#else + return ::log2(x); +#endif +} extern "C++" __device__ float logb(float x); // OpenCL extern "C++" inline __device__ float logbf(float x) { return ::logb(x); } extern "C++" __device__ float log(float x); // OpenCL -extern "C++" inline __device__ float logf(float x) { return ::log(x); } +extern "C++" __device__ float native_log(float x); // OpenCL +extern "C++" inline __device__ float logf(float x) { +#ifdef __FAST_MATH__ + return ::native_log(x); +#else + return ::log(x); +#endif +} extern "C" __device__ long int __chip_lrint_f32(float x); // Custom extern "C++" inline __device__ long int lrintf(float x) { @@ -356,8 +405,15 @@ extern "C++" inline __device__ float roundf(float x) { return static_cast(::round(x)); } -extern "C++" __device__ float rsqrt(float x); // OpenCL -extern "C++" inline __device__ float rsqrtf(float x) { return ::rsqrt(x); } +extern "C++" __device__ float rsqrt(float x); // OpenCL +extern "C++" __device__ float native_rsqrt(float x); // OpenCL +extern "C++" inline __device__ float rsqrtf(float x) { +#ifdef __FAST_MATH__ + return ::native_rsqrt(x); +#else + return ::rsqrt(x); +#endif +} extern "C" __device__ float __ocml_scalbn_f32(float x, int n); // OCML extern "C++" inline __device__ float scalbnf(float x, int n) { @@ -385,7 +441,14 @@ extern "C++" inline __device__ void sincospif(float x, float *sptr, } extern "C++" __device__ float sin(float x); // OpenCL -extern "C++" inline __device__ float sinf(float x) { return ::sin(x); } +extern "C++" __device__ float native_sin(float x); // OpenCL +extern "C++" inline __device__ float sinf(float x) { +#ifdef __FAST_MATH__ + return ::native_sin(x); +#else + return ::sin(x); +#endif +} extern "C++" __device__ float sinh(float x); // OpenCL extern "C++" inline __device__ float sinhf(float x) { return ::sinh(x); } @@ -394,10 +457,24 @@ extern "C++" __device__ float sinpi(float x); // OpenCL extern "C++" inline __device__ float sinpif(float x) { return ::sinpi(x); } extern "C++" __device__ float sqrt(float x); // OpenCL -extern "C++" inline __device__ float sqrtf(float x) { return ::sqrt(x); } +extern "C++" __device__ float native_sqrt(float x); // OpenCL +extern "C++" inline __device__ float sqrtf(float x) { +#ifdef __FAST_MATH__ + return ::native_sqrt(x); +#else + return ::sqrt(x); +#endif +} extern "C++" __device__ float tan(float x); // OpenCL -extern "C++" inline __device__ float tanf(float x) { return ::tan(x); } +extern "C++" __device__ float native_tan(float x); // OpenCL +extern "C++" inline __device__ float tanf(float x) { +#ifdef __FAST_MATH__ + return ::native_tan(x); +#else + return ::tan(x); +#endif +} extern "C++" __device__ float tanh(float x); // OpenCL extern "C++" inline __device__ float tanhf(float x) { return ::tanh(x); } diff --git a/tests/compiler/CMakeLists.txt b/tests/compiler/CMakeLists.txt index de2499412..bb5c07fc8 100644 --- a/tests/compiler/CMakeLists.txt +++ b/tests/compiler/CMakeLists.txt @@ -113,3 +113,7 @@ add_hipcc_test(TestLdg.hip HIPCC_OPTIONS -fsyntax-only) add_hipcc_test(TestSwitchCase.hip HIPCC_OPTIONS -O1 -c) add_hipcc_test(TestHostSideHIPVectors.hip HIPCC_OPTIONS -fsyntax-only) add_hipcc_test(TestAlignAttr.hip HIPCC_OPTIONS -fsyntax-only) + +# Check __FAST_MATH__ is set for -ffast-math and preprocessor guards +# using it are not hiding errors. +add_hipcc_test(TestFastMath.hip HIPCC_OPTIONS -fsyntax-only -ffast-math) diff --git a/tests/compiler/TestFastMath.hip b/tests/compiler/TestFastMath.hip new file mode 100644 index 000000000..4268a06fd --- /dev/null +++ b/tests/compiler/TestFastMath.hip @@ -0,0 +1,4 @@ +#include +#ifndef __FAST_MATH__ +#error "__FAST_MATH__ macro is not defined with -ffast-math" +#endif