Adjust some compiler options

HomeOfVapourSynthEvolution · Jul 3, 2020 · fff2c99 · fff2c99
1 parent ebb7b1f
commit fff2c99
Show file tree

Hide file tree

Showing 5 changed files with 28 additions and 3 deletions.
diff --git a/CAS/CAS.vcxproj b/CAS/CAS.vcxproj
@@ -39,6 +39,7 @@
       <WarningLevel>Level3</WarningLevel>
       <MultiProcessorCompilation>true</MultiProcessorCompilation>
       <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FloatingPointExceptions>false</FloatingPointExceptions>
       <ConformanceMode>true</ConformanceMode>
       <LanguageStandard>stdcpp17</LanguageStandard>
     </ClCompile>

diff --git a/CAS/VCL2/vectorf128.h b/CAS/VCL2/vectorf128.h
@@ -1300,6 +1300,14 @@ static inline Vec4f approx_recipr(Vec4f const a) {
 #endif
 }
 
+// Newton-Raphson refined approximate reciprocal (23 bit precision)
+static inline Vec4f rcp_nr(Vec4f const a) {
+    Vec4f nr = _mm_rcp_ps(a);
+    Vec4f muls = nr * nr * a;
+    Vec4f dbl = nr + nr;
+    return dbl - muls;
+}
+
 // approximate reciprocal squareroot (Faster than 1.f / sqrt(a). Relative accuracy better than 2^-11)
 static inline Vec4f approx_rsqrt(Vec4f const a) {
     // use more accurate version if available. (none of these will raise exceptions on zero)

diff --git a/CAS/VCL2/vectorf256.h b/CAS/VCL2/vectorf256.h
@@ -1363,6 +1363,14 @@ static inline Vec8f approx_recipr(Vec8f const a) {
 #endif
 }
 
+// Newton-Raphson refined approximate reciprocal (23 bit precision)
+static inline Vec8f rcp_nr(Vec8f const a) {
+    Vec8f nr = _mm256_rcp_ps(a);
+    Vec8f muls = nr * nr * a;
+    Vec8f dbl = nr + nr;
+    return dbl - muls;
+}
+
 // approximate reciprocal squareroot (Faster than 1.f / sqrt(a). Relative accuracy better than 2^-11)
 static inline Vec8f approx_rsqrt(Vec8f const a) {
 // use more accurate version if available. (none of these will raise exceptions on zero)

diff --git a/CAS/VCL2/vectorf512.h b/CAS/VCL2/vectorf512.h
@@ -660,6 +660,14 @@ static inline Vec16f approx_recipr(Vec16f const a) {
 #endif
 }
 
+// Newton-Raphson refined approximate reciprocal (23 bit precision)
+static inline Vec16f rcp_nr(Vec16f const a) {
+    Vec16f nr = _mm512_rcp14_ps(a);
+    Vec16f muls = nr * nr * a;
+    Vec16f dbl = nr + nr;
+    return dbl - muls;
+}
+
 // approximate reciprocal squareroot (Faster than 1.f / sqrt(a).
 // Relative accuracy better than 2^-11 without AVX512, 2^-14 with AVX512F, full precision with AVX512ER)
 static inline Vec16f approx_rsqrt(Vec16f const a) {

diff --git a/meson.build b/meson.build
@@ -1,7 +1,7 @@
 project('CAS', 'cpp',
   default_options: ['buildtype=release', 'b_ndebug=if-release', 'cpp_std=c++17'],
   meson_version: '>=0.48.0',
-  version: '1'
+  version: '2'
 )
 
 sources = [
@@ -14,7 +14,7 @@ vapoursynth_dep = dependency('vapoursynth').partial_dependency(compile_args: tru
 libs = []
 
 if host_machine.cpu_family().startswith('x86')
-  add_project_arguments('-DCAS_X86', '-mfpmath=sse', '-msse2', language: 'cpp')
+  add_project_arguments('-fno-math-errno', '-fno-trapping-math', '-DCAS_X86', '-mfpmath=sse', '-msse2', language: 'cpp')
 
   sources += [
     'CAS/CAS_SSE2.cpp',
@@ -49,7 +49,7 @@ if host_machine.cpu_family().startswith('x86')
 
   libs += static_library('avx512', 'CAS/CAS_AVX512.cpp',
     dependencies: vapoursynth_dep,
-    cpp_args: ['-mavx512f', '-mavx512bw', '-mavx512dq', '-mavx512vl', '-mfma'],
+    cpp_args: ['-mavx512f', '-mavx512vl', '-mavx512bw', '-mavx512dq', '-mfma'],
     gnu_symbol_visibility: 'hidden'
   )
 endif