From b389e93e4f3f0de53ded778c62ef14c223e0621d Mon Sep 17 00:00:00 2001 From: Igor Freire Date: Thu, 15 Apr 2021 15:04:04 -0300 Subject: [PATCH] Review intrinsics includes and macros on gf256 - Define the LINUX_ARM macro automatically based on ARM flags set by the compiler. - Detect ARM Neon based on compiler flags. Take both __ARM_NEON and __ARM_NEON__ into account. - Fix the condition for inclusion of AVX2, as discussed in Issue #31. - Reorganize some macro definitions for better readability. For example, use an isolated if-elif-else conditional to define GF256_M128. - Throw error if SSE2 is not available when building for a non-mobile target. - Remove the unused GF256_ALIGNED_ACCESSES macro. --- CMakeLists.txt | 3 --- gf256.cpp | 2 +- gf256.h | 48 ++++++++++++++++++++++-------------------------- 3 files changed, 23 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7852b3f..a0e6a16 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,9 +59,6 @@ else() set(CMAKE_CXX_FLAGS "-Wall -Wextra") set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -march=native") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native") - if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") - add_definitions(-DLINUX_ARM=1) - endif() endif() add_library(wirehair ${LIB_SOURCE_FILES}) diff --git a/gf256.cpp b/gf256.cpp index 510146b..66aa9f0 100644 --- a/gf256.cpp +++ b/gf256.cpp @@ -200,7 +200,7 @@ static bool gf256_self_test() #endif #if defined(GF256_TRY_NEON) -# if defined(IOS) && defined(__ARM_NEON__) +# if defined(IOS) && (defined(__ARM_NEON) || defined(__ARM_NEON__)) // Requires iPhone 5S or newer static const bool CpuHasNeon = true; static const bool CpuHasNeon64 = true; diff --git a/gf256.h b/gf256.h index 71134d2..3eee8b8 100644 --- a/gf256.h +++ b/gf256.h @@ -53,50 +53,46 @@ //------------------------------------------------------------------------------ // Platform/Architecture +#if defined(__ARM_ARCH) || defined(__ARM_NEON) || defined(__ARM_NEON__) + #define LINUX_ARM +#endif + #if defined(ANDROID) || defined(IOS) || defined(LINUX_ARM) || defined(__powerpc__) || defined(__s390__) #define GF256_TARGET_MOBILE #endif // ANDROID -#if defined(__AVX2__) || (defined (_MSC_VER) && _MSC_VER >= 1900) && !defined(GF256_TARGET_MOBILE) +#if defined(__AVX2__) && (!defined (_MSC_VER) || _MSC_VER >= 1900) #define GF256_TRY_AVX2 /* 256-bit */ #include #define GF256_ALIGN_BYTES 32 + #define GF256_M256 __m256i #else // __AVX2__ #define GF256_ALIGN_BYTES 16 #endif // __AVX2__ -#if !defined(GF256_TARGET_MOBILE) - // Note: MSVC currently only supports SSSE3 but not AVX2 - #include // SSSE3: _mm_shuffle_epi8 +#if defined(__SSE2__) #include // SSE2 -#endif // GF256_TARGET_MOBILE +#elif !defined(GF256_TARGET_MOBILE) + #error "SSE2 is required for non-mobile target" +#endif // __SSE2__ -#if defined(HAVE_ARM_NEON_H) - #include -#endif // HAVE_ARM_NEON_H - -#if defined(GF256_TARGET_MOBILE) +#if defined(__SSE3__) + #define GF256_TRY_SSE3 + #include // SSSE3: _mm_shuffle_epi8 +#endif // __SSE3__ - #define GF256_ALIGNED_ACCESSES /* Inputs must be aligned to GF256_ALIGN_BYTES */ +#if defined(__ARM_NEON) || defined(__ARM_NEON__) + #include + #define GF256_TRY_NEON +#endif // __ARM_NEON__ -# if defined(HAVE_ARM_NEON_H) - // Compiler-specific 128-bit SIMD register keyword +// Compiler-specific 128-bit SIMD register keyword +#if defined(GF256_TARGET_MOBILE) && defined(GF256_TRY_NEON) #define GF256_M128 uint8x16_t - #define GF256_TRY_NEON +#elif defined(__SSE2__) || defined(GF256_TRY_SSE3) + #define GF256_M128 __m128i #else #define GF256_M128 uint64_t -# endif - -#else // GF256_TARGET_MOBILE - - // Compiler-specific 128-bit SIMD register keyword - #define GF256_M128 __m128i - -#endif // GF256_TARGET_MOBILE - -#ifdef GF256_TRY_AVX2 - // Compiler-specific 256-bit SIMD register keyword - #define GF256_M256 __m256i #endif // Compiler-specific C++11 restrict keyword