Skip to content

Commit

Permalink
Review intrinsics includes and macros on gf256
Browse files Browse the repository at this point in the history
- Define the LINUX_ARM macro automatically based on ARM flags set by the
  compiler.
- Detect ARM Neon based on compiler flags. Take both __ARM_NEON and
  __ARM_NEON__ into account.
- Fix the condition for inclusion of AVX2, as discussed in Issue catid#31.
- Reorganize some macro definitions for better readability. For example,
  use an isolated if-elif-else conditional to define GF256_M128.
- Throw error if SSE2 is not available when building for a non-mobile
  target.
- Remove the unused GF256_ALIGNED_ACCESSES macro.
  • Loading branch information
igorauad committed Apr 15, 2021
1 parent e468308 commit b389e93
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 30 deletions.
3 changes: 0 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,6 @@ else()
set(CMAKE_CXX_FLAGS "-Wall -Wextra")
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -march=native")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native")
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
add_definitions(-DLINUX_ARM=1)
endif()
endif()

add_library(wirehair ${LIB_SOURCE_FILES})
Expand Down
2 changes: 1 addition & 1 deletion gf256.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ static bool gf256_self_test()
#endif

#if defined(GF256_TRY_NEON)
# if defined(IOS) && defined(__ARM_NEON__)
# if defined(IOS) && (defined(__ARM_NEON) || defined(__ARM_NEON__))
// Requires iPhone 5S or newer
static const bool CpuHasNeon = true;
static const bool CpuHasNeon64 = true;
Expand Down
48 changes: 22 additions & 26 deletions gf256.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,50 +53,46 @@
//------------------------------------------------------------------------------
// Platform/Architecture

#if defined(__ARM_ARCH) || defined(__ARM_NEON) || defined(__ARM_NEON__)
#define LINUX_ARM
#endif

#if defined(ANDROID) || defined(IOS) || defined(LINUX_ARM) || defined(__powerpc__) || defined(__s390__)
#define GF256_TARGET_MOBILE
#endif // ANDROID

#if defined(__AVX2__) || (defined (_MSC_VER) && _MSC_VER >= 1900) && !defined(GF256_TARGET_MOBILE)
#if defined(__AVX2__) && (!defined (_MSC_VER) || _MSC_VER >= 1900)
#define GF256_TRY_AVX2 /* 256-bit */
#include <immintrin.h>
#define GF256_ALIGN_BYTES 32
#define GF256_M256 __m256i
#else // __AVX2__
#define GF256_ALIGN_BYTES 16
#endif // __AVX2__

#if !defined(GF256_TARGET_MOBILE)
// Note: MSVC currently only supports SSSE3 but not AVX2
#include <tmmintrin.h> // SSSE3: _mm_shuffle_epi8
#if defined(__SSE2__)
#include <emmintrin.h> // SSE2
#endif // GF256_TARGET_MOBILE
#elif !defined(GF256_TARGET_MOBILE)
#error "SSE2 is required for non-mobile target"
#endif // __SSE2__

#if defined(HAVE_ARM_NEON_H)
#include <arm_neon.h>
#endif // HAVE_ARM_NEON_H

#if defined(GF256_TARGET_MOBILE)
#if defined(__SSE3__)
#define GF256_TRY_SSE3
#include <tmmintrin.h> // SSSE3: _mm_shuffle_epi8
#endif // __SSE3__

#define GF256_ALIGNED_ACCESSES /* Inputs must be aligned to GF256_ALIGN_BYTES */
#if defined(__ARM_NEON) || defined(__ARM_NEON__)
#include <arm_neon.h>
#define GF256_TRY_NEON
#endif // __ARM_NEON__

# if defined(HAVE_ARM_NEON_H)
// Compiler-specific 128-bit SIMD register keyword
// Compiler-specific 128-bit SIMD register keyword
#if defined(GF256_TARGET_MOBILE) && defined(GF256_TRY_NEON)
#define GF256_M128 uint8x16_t
#define GF256_TRY_NEON
#elif defined(__SSE2__) || defined(GF256_TRY_SSE3)
#define GF256_M128 __m128i
#else
#define GF256_M128 uint64_t
# endif

#else // GF256_TARGET_MOBILE

// Compiler-specific 128-bit SIMD register keyword
#define GF256_M128 __m128i

#endif // GF256_TARGET_MOBILE

#ifdef GF256_TRY_AVX2
// Compiler-specific 256-bit SIMD register keyword
#define GF256_M256 __m256i
#endif

// Compiler-specific C++11 restrict keyword
Expand Down

0 comments on commit b389e93

Please sign in to comment.