Skip to content

Commit

Permalink
Adjust some compiler options
Browse files Browse the repository at this point in the history
  • Loading branch information
HolyWu committed Jul 3, 2020
1 parent ebb7b1f commit fff2c99
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 3 deletions.
1 change: 1 addition & 0 deletions CAS/CAS.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
<WarningLevel>Level3</WarningLevel>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<BufferSecurityCheck>false</BufferSecurityCheck>
<FloatingPointExceptions>false</FloatingPointExceptions>
<ConformanceMode>true</ConformanceMode>
<LanguageStandard>stdcpp17</LanguageStandard>
</ClCompile>
Expand Down
8 changes: 8 additions & 0 deletions CAS/VCL2/vectorf128.h
Original file line number Diff line number Diff line change
Expand Up @@ -1300,6 +1300,14 @@ static inline Vec4f approx_recipr(Vec4f const a) {
#endif
}

// Newton-Raphson refined approximate reciprocal (23 bit precision)
static inline Vec4f rcp_nr(Vec4f const a) {
Vec4f nr = _mm_rcp_ps(a);
Vec4f muls = nr * nr * a;
Vec4f dbl = nr + nr;
return dbl - muls;
}

// approximate reciprocal squareroot (Faster than 1.f / sqrt(a). Relative accuracy better than 2^-11)
static inline Vec4f approx_rsqrt(Vec4f const a) {
// use more accurate version if available. (none of these will raise exceptions on zero)
Expand Down
8 changes: 8 additions & 0 deletions CAS/VCL2/vectorf256.h
Original file line number Diff line number Diff line change
Expand Up @@ -1363,6 +1363,14 @@ static inline Vec8f approx_recipr(Vec8f const a) {
#endif
}

// Newton-Raphson refined approximate reciprocal (23 bit precision)
static inline Vec8f rcp_nr(Vec8f const a) {
Vec8f nr = _mm256_rcp_ps(a);
Vec8f muls = nr * nr * a;
Vec8f dbl = nr + nr;
return dbl - muls;
}

// approximate reciprocal squareroot (Faster than 1.f / sqrt(a). Relative accuracy better than 2^-11)
static inline Vec8f approx_rsqrt(Vec8f const a) {
// use more accurate version if available. (none of these will raise exceptions on zero)
Expand Down
8 changes: 8 additions & 0 deletions CAS/VCL2/vectorf512.h
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,14 @@ static inline Vec16f approx_recipr(Vec16f const a) {
#endif
}

// Newton-Raphson refined approximate reciprocal (23 bit precision)
static inline Vec16f rcp_nr(Vec16f const a) {
Vec16f nr = _mm512_rcp14_ps(a);
Vec16f muls = nr * nr * a;
Vec16f dbl = nr + nr;
return dbl - muls;
}

// approximate reciprocal squareroot (Faster than 1.f / sqrt(a).
// Relative accuracy better than 2^-11 without AVX512, 2^-14 with AVX512F, full precision with AVX512ER)
static inline Vec16f approx_rsqrt(Vec16f const a) {
Expand Down
6 changes: 3 additions & 3 deletions meson.build
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
project('CAS', 'cpp',
default_options: ['buildtype=release', 'b_ndebug=if-release', 'cpp_std=c++17'],
meson_version: '>=0.48.0',
version: '1'
version: '2'
)

sources = [
Expand All @@ -14,7 +14,7 @@ vapoursynth_dep = dependency('vapoursynth').partial_dependency(compile_args: tru
libs = []

if host_machine.cpu_family().startswith('x86')
add_project_arguments('-DCAS_X86', '-mfpmath=sse', '-msse2', language: 'cpp')
add_project_arguments('-fno-math-errno', '-fno-trapping-math', '-DCAS_X86', '-mfpmath=sse', '-msse2', language: 'cpp')

sources += [
'CAS/CAS_SSE2.cpp',
Expand Down Expand Up @@ -49,7 +49,7 @@ if host_machine.cpu_family().startswith('x86')

libs += static_library('avx512', 'CAS/CAS_AVX512.cpp',
dependencies: vapoursynth_dep,
cpp_args: ['-mavx512f', '-mavx512bw', '-mavx512dq', '-mavx512vl', '-mfma'],
cpp_args: ['-mavx512f', '-mavx512vl', '-mavx512bw', '-mavx512dq', '-mfma'],
gnu_symbol_visibility: 'hidden'
)
endif
Expand Down

0 comments on commit fff2c99

Please sign in to comment.