From b87bb9b92edf659f63d7d773d3e958159f49df89 Mon Sep 17 00:00:00 2001 From: Denis Yaroshevskiy Date: Fri, 27 Sep 2024 06:24:54 -0700 Subject: [PATCH] simd::contains (the interfaces) (#2299) Summary: Pull Request resolved: https://github.com/facebook/folly/pull/2299 simdContains - everything but the actual handwritten algorithm. Reviewed By: Gownta Differential Revision: D63116101 fbshipit-source-id: 2c9b23f0111f0fa2f703ca13e8cd3a1097c685fd --- CMakeLists.txt | 4 +- folly/algorithm/simd/BUCK | 13 +++ folly/algorithm/simd/Contains.cpp | 42 +++++++++ folly/algorithm/simd/Contains.h | 65 +++++++++++++ folly/algorithm/simd/FindFixed.h | 3 +- folly/algorithm/simd/detail/BUCK | 12 +++ folly/algorithm/simd/detail/ContainsImpl.h | 91 +++++++++++++++++++ folly/algorithm/simd/detail/Traits.h | 40 +++----- .../algorithm/simd/detail/test/TraitsTest.cpp | 10 +- folly/algorithm/simd/test/BUCK | 11 +++ folly/algorithm/simd/test/ContainsTest.cpp | 81 +++++++++++++++++ 11 files changed, 336 insertions(+), 36 deletions(-) create mode 100644 folly/algorithm/simd/Contains.cpp create mode 100644 folly/algorithm/simd/Contains.h create mode 100644 folly/algorithm/simd/detail/ContainsImpl.h create mode 100644 folly/algorithm/simd/test/ContainsTest.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c30fc9938ed..2f441559bb5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -649,13 +649,13 @@ if (BUILD_TESTS OR BUILD_BENCHMARKS) DIRECTORY algorithm/simd/detail/test/ TEST algorithm_simd_detail_simd_any_of_test SOURCES SimdAnyOfTest.cpp TEST algorithm_simd_detail_simd_for_each_test SOURCES SimdForEachTest.cpp + TEST algorithm_simd_detail_simd_traits_test SOURCES TraitsTest.cpp TEST algorithm_simd_detail_unroll_utils_test SOURCES UnrollUtilsTest.cpp - # disabled until C++20 - # TEST algorithm_simd_detail_simd_traits_test SOURCES TraitsTest.cpp DIRECTORY algorithm/simd/test/ TEST algorithm_simd_find_fixed_test SOURCES FindFixedTest.cpp TEST algorithm_simd_movemask_test SOURCES MovemaskTest.cpp + TEST algorithm_simd_simd_contains_test SOURCES SimdContainsTest.cpp DIRECTORY chrono/test/ TEST chrono_conv_test WINDOWS_DISABLED diff --git a/folly/algorithm/simd/BUCK b/folly/algorithm/simd/BUCK index 1343e8f9b73..27c0471325f 100644 --- a/folly/algorithm/simd/BUCK +++ b/folly/algorithm/simd/BUCK @@ -22,3 +22,16 @@ cpp_library( "//folly/algorithm/simd/detail:traits", ], ) + +cpp_library( + name = "contains", + srcs = ["Contains.cpp"], + headers = ["Contains.h"], + deps = [ + "//folly/algorithm/simd/detail:simd_contains_impl", + ], + exported_deps = [ + "//folly:c_portability", + "//folly/algorithm/simd/detail:traits", + ], +) diff --git a/folly/algorithm/simd/Contains.cpp b/folly/algorithm/simd/Contains.cpp new file mode 100644 index 00000000000..d5654714764 --- /dev/null +++ b/folly/algorithm/simd/Contains.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +namespace folly::simd::detail { + +bool containsU8(folly::span haystack, std::uint8_t needle) { + return containsImpl(haystack, needle); +} +bool containsU16( + folly::span haystack, std::uint16_t needle) { + return containsImpl(haystack, needle); +} +bool containsU32( + folly::span haystack, std::uint32_t needle) { + return containsImpl(haystack, needle); +} + +bool containsU64( + folly::span haystack, std::uint64_t needle) { + return containsImpl(haystack, needle); +} + +} // namespace folly::simd::detail diff --git a/folly/algorithm/simd/Contains.h b/folly/algorithm/simd/Contains.h new file mode 100644 index 00000000000..968d96629e5 --- /dev/null +++ b/folly/algorithm/simd/Contains.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include + +namespace folly::simd { +namespace detail { + +// no overloading for easier of profiling. + +bool containsU8(folly::span haystack, std::uint8_t needle); +bool containsU16( + folly::span haystack, std::uint16_t needle); +bool containsU32( + folly::span haystack, std::uint32_t needle); +bool containsU64( + folly::span haystack, std::uint64_t needle); + +} // namespace detail + +struct contains_fn { + template + requires simd::detail::has_integral_simd_friendly_equivalent< + std::ranges::range_value_t> + FOLLY_ERASE bool operator()(R&& rng, std::ranges::range_value_t x) const { + auto castRng = simd::detail::asSimdFriendlyUint(folly::span(rng)); + auto castX = simd::detail::asSimdFriendlyUint(x); + + using T = decltype(castX); + + if constexpr (std::is_same_v) { + return detail::containsU8(castRng, castX); + } else if constexpr (std::is_same_v) { + return detail::containsU16(castRng, castX); + } else if constexpr (std::is_same_v) { + return detail::containsU32(castRng, castX); + } else { + static_assert( + std::is_same_v, "internal error, unknown type"); + return detail::containsU64(castRng, castX); + } + } +}; + +inline constexpr contains_fn contains; + +} // namespace folly::simd diff --git a/folly/algorithm/simd/FindFixed.h b/folly/algorithm/simd/FindFixed.h index 547b02a6b3a..455799ad206 100644 --- a/folly/algorithm/simd/FindFixed.h +++ b/folly/algorithm/simd/FindFixed.h @@ -293,7 +293,8 @@ constexpr std::optional findFixed(std::span where, U x) return find_fixed_detail::findFixedConstexpr(std::span(where), x); } else { return find_fixed_detail::findFixedDispatch( - detail::asSimdFriendlyUint(where), detail::asSimdFriendlyUint(x)); + simd::detail::asSimdFriendlyUint(where), + simd::detail::asSimdFriendlyUint(x)); } } diff --git a/folly/algorithm/simd/detail/BUCK b/folly/algorithm/simd/detail/BUCK index 53a172a2e6b..1cdebfb1abd 100644 --- a/folly/algorithm/simd/detail/BUCK +++ b/folly/algorithm/simd/detail/BUCK @@ -26,6 +26,17 @@ cpp_library( ], ) +cpp_library( + name = "simd_contains_impl", + headers = ["ContainsImpl.h"], + exported_deps = [ + ":simd_any_of", + ":simd_char_platform", + "//folly:c_portability", + "//folly/container:span", + ], +) + cpp_library( name = "simd_for_each", headers = ["SimdForEach.h"], @@ -40,6 +51,7 @@ cpp_library( name = "traits", headers = ["Traits.h"], exported_deps = [ + "//folly:c_portability", "//folly:memory", "//folly:traits", "//folly/container:span", diff --git a/folly/algorithm/simd/detail/ContainsImpl.h b/folly/algorithm/simd/detail/ContainsImpl.h new file mode 100644 index 00000000000..66d049f4545 --- /dev/null +++ b/folly/algorithm/simd/detail/ContainsImpl.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace folly::simd::detail { + +/* + * The functions in this file are FOLLY_ERASE to make sure + * that the only place behind a call boundary is the explicit one. + */ + +template +FOLLY_ERASE bool containsImplStd(folly::span haystack, T needle) { + static_assert( + std::is_unsigned_v, "we should only get here for uint8/16/32/64"); + if constexpr (sizeof(T) == 1) { + auto* ptr = reinterpret_cast(haystack.data()); + auto castNeedle = static_cast(needle); + if (haystack.empty()) { // memchr requires not null + return false; + } + return std::memchr(ptr, castNeedle, haystack.size()) != nullptr; + } else if constexpr (sizeof(T) == sizeof(wchar_t)) { + auto* ptr = reinterpret_cast(haystack.data()); + auto castNeedle = static_cast(needle); + if (haystack.empty()) { // wmemchr requires not null + return false; + } + return std::wmemchr(ptr, castNeedle, haystack.size()) != nullptr; + } else { + // Using find instead of any_of on an off chance that the standard library + // will add some custom vectorization. + // That wouldn't be possible for any_of because of the predicates. + return std::find(haystack.begin(), haystack.end(), needle) != + haystack.end(); + } +} + +template +constexpr bool hasHandwrittenContains() { + return std::is_same_v && + !std::is_same_v; +} + +template +FOLLY_ERASE bool containsImplHandwritten( + folly::span haystack, T needle) { + static_assert(std::is_same_v, ""); + auto as_chars = folly::reinterpret_span_cast(haystack); + return simdAnyOf( + as_chars.data(), + as_chars.data() + as_chars.size(), + [&](SimdCharPlatform::reg_t x) { + return SimdCharPlatform::equal(x, static_cast(needle)); + }); +} + +template +FOLLY_ERASE bool containsImpl(folly::span haystack, T needle) { + if constexpr (hasHandwrittenContains()) { + return containsImplHandwritten(haystack, needle); + } else { + return containsImplStd(haystack, needle); + } +} + +} // namespace folly::simd::detail diff --git a/folly/algorithm/simd/detail/Traits.h b/folly/algorithm/simd/detail/Traits.h index ee2ef3d5105..8658d8881b3 100644 --- a/folly/algorithm/simd/detail/Traits.h +++ b/folly/algorithm/simd/detail/Traits.h @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -23,7 +24,7 @@ #include #include -namespace folly::detail { +namespace folly::simd::detail { template auto findSimdFriendlyEquivalent() { @@ -36,25 +37,9 @@ auto findSimdFriendlyEquivalent() { return double{}; } } else if constexpr (std::is_signed_v) { - if constexpr (sizeof(T) == 1) { - return std::int8_t{}; - } else if constexpr (sizeof(T) == 2) { - return std::int16_t{}; - } else if constexpr (sizeof(T) == 4) { - return std::int32_t{}; - } else if constexpr (sizeof(T) == 8) { - return std::int64_t{}; - } + return int_bits_t{}; } else if constexpr (std::is_unsigned_v) { - if constexpr (sizeof(T) == 1) { - return std::uint8_t{}; - } else if constexpr (sizeof(T) == 2) { - return std::uint16_t{}; - } else if constexpr (sizeof(T) == 4) { - return std::uint32_t{}; - } else if constexpr (sizeof(T) == 8) { - return std::uint64_t{}; - } + return uint_bits_t{}; } } @@ -63,7 +48,7 @@ concept has_simd_friendly_equivalent = !std::is_same_v())>; template -using simd_friendly_equivalent_t = folly::like_t< // +using simd_friendly_equivalent_t = like_t< // T, decltype(findSimdFriendlyEquivalent>())>; @@ -77,24 +62,23 @@ template using integral_simd_friendly_equivalent = simd_friendly_equivalent_t; template -auto asSimdFriendly(folly::span s) { - return folly::reinterpret_span_cast>(s); +FOLLY_ERASE auto asSimdFriendly(folly::span s) { + return reinterpret_span_cast>(s); } template -constexpr auto asSimdFriendly(T x) { +FOLLY_ERASE constexpr auto asSimdFriendly(T x) { return static_cast>(x); } template -auto asSimdFriendlyUint(folly::span s) { - return folly::reinterpret_span_cast< - folly::like_t>>(s); +FOLLY_ERASE auto asSimdFriendlyUint(folly::span s) { + return reinterpret_span_cast>>(s); } template -constexpr auto asSimdFriendlyUint(T x) { +FOLLY_ERASE constexpr auto asSimdFriendlyUint(T x) { return static_cast>(x); } -} // namespace folly::detail +} // namespace folly::simd::detail diff --git a/folly/algorithm/simd/detail/test/TraitsTest.cpp b/folly/algorithm/simd/detail/test/TraitsTest.cpp index 5d01d9630df..cb07707e8ea 100644 --- a/folly/algorithm/simd/detail/test/TraitsTest.cpp +++ b/folly/algorithm/simd/detail/test/TraitsTest.cpp @@ -19,9 +19,9 @@ #include #include -namespace folly::detail { +namespace folly::simd::detail { -struct FollySimdTraitsTest : testing::Test {}; +struct SimdTraitsTest : testing::Test {}; namespace simd_friendly_equivalent_test { @@ -98,7 +98,7 @@ static_assert(Overloading{}(SomeInt{}) == 2); } // namespace integral_simd_friendly_equivalent_test -TEST_F(FollySimdTraitsTest, AsSimdFriendly) { +TEST_F(SimdTraitsTest, AsSimdFriendly) { enum SomeEnum : int { Foo = 1, Bar, Baz }; static_assert(asSimdFriendly(SomeEnum::Foo) == 1); @@ -122,7 +122,7 @@ void asSimdFriendlyUintTypeTest() { asSimdFriendlyUint(std::span{}), std::span{}); } -TEST_F(FollySimdTraitsTest, AsSimdFriendlyUint) { +TEST_F(SimdTraitsTest, AsSimdFriendlyUint) { enum SomeEnum : int { Foo = 1, Bar, Baz }; static_assert(asSimdFriendlyUint(SomeEnum::Foo) == 1U); @@ -136,4 +136,4 @@ TEST_F(FollySimdTraitsTest, AsSimdFriendlyUint) { asSimdFriendlyUintTypeTest(); } -} // namespace folly::detail +} // namespace folly::simd::detail diff --git a/folly/algorithm/simd/test/BUCK b/folly/algorithm/simd/test/BUCK index 3844ae36967..ecb5a780ced 100644 --- a/folly/algorithm/simd/test/BUCK +++ b/folly/algorithm/simd/test/BUCK @@ -36,3 +36,14 @@ cpp_benchmark( "//folly/init:init", ], ) + +cpp_unittest( + name = "contains_test", + srcs = ["ContainsTest.cpp"], + headers = [], + deps = [ + "//folly/algorithm/simd:contains", + "//folly/algorithm/simd/detail:simd_contains_impl", + "//folly/portability:gtest", + ], +) diff --git a/folly/algorithm/simd/test/ContainsTest.cpp b/folly/algorithm/simd/test/ContainsTest.cpp new file mode 100644 index 00000000000..55aa4cca679 --- /dev/null +++ b/folly/algorithm/simd/test/ContainsTest.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include + +namespace folly::simd { + +template +struct ContainsTest : ::testing::Test {}; + +using TypesToTest = ::testing::Types< + std::int8_t, + std::int16_t, + std::int32_t, + std::int64_t, + std::uint8_t, + std::uint16_t, + std::uint32_t, + std::uint64_t>; + +TYPED_TEST_SUITE(ContainsTest, TypesToTest); + +template +void testSimdContainsVerify(std::span haystack, T needle, bool expected) { + bool actual1 = simd::contains(haystack, needle); + ASSERT_EQ(expected, actual1); + + auto const_haystack = folly::static_span_cast(haystack); + + if constexpr ( + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v) { + bool actual2 = simd::detail::containsImplStd(const_haystack, needle); + ASSERT_EQ(expected, actual2) << " haystack.size(): " << haystack.size(); + } + + if constexpr (std::is_same_v) { + bool actual3 = + simd::detail::containsImplHandwritten(const_haystack, needle); + ASSERT_EQ(expected, actual3) << " haystack.size(): " << haystack.size(); + } +} + +TYPED_TEST(ContainsTest, Basic) { + using T = TypeParam; + + for (std::size_t size = 0; size != 100; ++size) { + std::vector buf(size, T{0}); + for (std::size_t offset = 0; offset != std::min(32UL, buf.size()); + ++offset) { + folly::span haystack(buf.begin() + offset, buf.end()); + T needle{1}; + testSimdContainsVerify(haystack, needle, /*expected*/ false); + + for (auto& x : haystack) { + x = needle; + testSimdContainsVerify(haystack, needle, /*expected*/ true); + x = 0; + } + } + } +} + +} // namespace folly::simd