From 0a422f738b94daaf97830484f3f1440f1ab0d87a Mon Sep 17 00:00:00 2001 From: Jimmy Lu Date: Fri, 15 Nov 2024 14:37:39 -0800 Subject: [PATCH] fix: Remove same page hack for simdStrstr (#11553) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/11553 The hack is breaking ASAN check, removing it does not give huge performance loss though so we remove it. Benchmark results before vs after change: https://gist.github.com/Yuhta/b3f66e85d52b62240c09c15d6e7941bb Reviewed By: kevinwilfong Differential Revision: D66012678 fbshipit-source-id: 107ae9e715ff5edd20707fb49b970ae5d3190559 --- velox/common/base/SimdUtil-inl.h | 41 ++++++++++---------------------- velox/common/base/SimdUtil.cpp | 1 - 2 files changed, 12 insertions(+), 30 deletions(-) diff --git a/velox/common/base/SimdUtil-inl.h b/velox/common/base/SimdUtil-inl.h index be88b940e3a7..5e3009e29dae 100644 --- a/velox/common/base/SimdUtil-inl.h +++ b/velox/common/base/SimdUtil-inl.h @@ -1451,19 +1451,12 @@ using CharVector = xsimd::batch; #define VELOX_SIMD_STRSTR 0 #endif -extern const int kPageSize; - #if VELOX_SIMD_STRSTR -FOLLY_ALWAYS_INLINE bool pageSafe(const void* const ptr) { - return ((kPageSize - 1) & reinterpret_cast(ptr)) <= - kPageSize - CharVector::size; -} - -template +template size_t FOLLY_ALWAYS_INLINE smidStrstrMemcmp( const char* s, - size_t n, + int64_t n, const char* needle, size_t needleSize) { static_assert(kNeedleSize >= 2); @@ -1471,17 +1464,10 @@ size_t FOLLY_ALWAYS_INLINE smidStrstrMemcmp( VELOX_DCHECK_GT(n, 0); auto first = CharVector::broadcast(needle[0]); auto last = CharVector::broadcast(needle[needleSize - 1]); - size_t i = 0; - - for (; i <= n - needleSize; i += CharVector::size) { - // Assume that the input string is allocated on virtual pages : VP1, VP2, - // VP3 and VP4 has not been allocated yet, we need to check the end of input - // string is page-safe to over-read CharVector. - const auto lastPos = i + needleSize - 1; + int64_t i = 0; - if (lastPos + CharVector::size > n && !pageSafe(s + lastPos)) { - break; - } + for (int64_t end = n - needleSize - CharVector::size; i <= end; + i += CharVector::size) { auto blockFirst = CharVector::load_unaligned(s + i); const auto eqFirst = (first == blockFirst); /// std:find handle the fast-path for first-char-unmatch, so we also need @@ -1489,12 +1475,12 @@ size_t FOLLY_ALWAYS_INLINE smidStrstrMemcmp( if (eqFirst.mask() == 0) { continue; } - auto blockLast = CharVector::load_unaligned(s + lastPos); + auto blockLast = CharVector::load_unaligned(s + i + needleSize - 1); const auto eqLast = (last == blockLast); auto mask = (eqFirst && eqLast).mask(); while (mask != 0) { const auto bitpos = __builtin_ctz(mask); - if constexpr (compiled) { + if constexpr (kCompiled) { if constexpr (kNeedleSize == 2) { return i + bitpos; } @@ -1510,15 +1496,12 @@ size_t FOLLY_ALWAYS_INLINE smidStrstrMemcmp( } } // Fallback path for generic path. - for (; i <= n - needleSize; ++i) { - if constexpr (compiled) { - if (memcmp(s + i, needle, kNeedleSize) == 0) { - return i; - } + if (i + needleSize <= n) { + std::string_view sv(s, n); + if constexpr (kCompiled) { + return sv.find(needle, i, kNeedleSize); } else { - if (memcmp(s + i, needle, needleSize) == 0) { - return i; - } + return sv.find(needle, i, needleSize); } } diff --git a/velox/common/base/SimdUtil.cpp b/velox/common/base/SimdUtil.cpp index 18a70d118ebd..03576ac31ec4 100644 --- a/velox/common/base/SimdUtil.cpp +++ b/velox/common/base/SimdUtil.cpp @@ -62,7 +62,6 @@ const LeadingMask leadingMask64; const FromBitMask fromBitMask32; const FromBitMask fromBitMask64; -const int kPageSize = sysconf(_SC_PAGESIZE); } // namespace detail namespace {