From 5869b2756cbeebcd1a2bc7ca7d3d3516eb7038e9 Mon Sep 17 00:00:00 2001 From: Wayne Franz Date: Fri, 1 Nov 2024 11:34:51 -0400 Subject: [PATCH] Mergeback 6.3 fixes (#420) * Revert Bit Twiddle change from PR #377 (#397) An update to the TwiddleIn/Out functions from PR #377 seems to be causing a build failure in onnxruntime. This change reverts the single commit (0721c2c3bab6f87d98099ab48730369d57627d1d) that made those changes. We can re-apply the change with an appropriate fix in the future. Note: the commits in the PR were squashed, so that commit will not show up in the log. * Remove website URL from comments (#398) Referencing or using code from some websites is prohibited in this repository. This change removes an informational reference in the comments. * Add gfx1151 target (#399) (#401) Co-authored-by: Stanley Tsang * Spolifroni amd/624 changelogcleanup upcoming (#411) * edited to conform to standards * edited to conform to standards * updated the changelog for 6.3 (#418) --------- Co-authored-by: amd-garydeng Co-authored-by: Stanley Tsang Co-authored-by: spolifroni-amd --- CHANGELOG.md | 13 ++++++----- .../hipcub/backend/rocprim/util_type.hpp | 22 ++++++++++--------- scripts/copyright-date/check-copyright.sh | 1 - 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a43cd21..4a455f3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,6 @@ # Changelog for hipCUB -Documentation for hipCUB is available at -[https://rocm.docs.amd.com/projects/hipCUB/en/latest/](https://rocm.docs.amd.com/projects/hipCUB/en/latest/). +Full documentation for hipCUB is available at [https://rocm.docs.amd.com/projects/hipCUB/en/latest/](https://rocm.docs.amd.com/projects/hipCUB/en/latest/). ## (Unreleased) hipCUB-x.x.x for ROCm 6.4.0 @@ -9,14 +8,14 @@ Documentation for hipCUB is available at * Added `ForEach`, `ForEachN`, `ForEachCopy`, `ForEachCopyN` and `Bulk` functions to have parity with CUB. * Added the `hipcub::CubVector` type for CUB parity. -## (Unreleased) hipCUB-3.3.0 for ROCm 6.3.0 +## hipCUB-3.3.0 for ROCm 6.3.0 ### Fixed * Not all headers in hipCUB included `config.hpp` which could have resulted in build errors. ### Added -* Add support for large indices in `hipcub::DeviceSegmentedReduce::*`. rocPRIM's backend provides support for all reduce variants, but CUB's does not have support yet for `DeviceSegmentedReduce::Arg*`, so large indices support has been excluded for these as well in hipCUB. +* Support for large indices in `hipcub::DeviceSegmentedReduce::*` has been added, with the exception of `DeviceSegmentedReduce::Arg*`. Although rocPRIM's backend provides support for all reduce variants, CUB does not support large indices in `DeviceSegmentedReduce::Arg*`. For this reason, large index support is not available for `hipcub::DeviceSegmentedReduce::Arg*`. * Add -t smoke option in rtest.py. It will run a subset of tests such that the total test time is in 5 minutes. Use python3 ./rtest.py --test smoke or python3 ./rtest.py -t smoke to execute smoke test. * Add inplace overloads of `DeviceScan` functions. * Add inplace overloads of `DeviceSelect::Flagged` and `DeviceSelect::If`. @@ -27,7 +26,11 @@ Documentation for hipCUB is available at ### Changed * The NVIDIA backend now requires CUB, Thrust and libcu++ 2.4.0. If it is not found it will be downloaded from the NVIDIA CCCL repository. -## (Unreleased) hipCUB-3.2.0 for ROCm 6.2.0 +### Resolved issues + +* Fixed an issue where `config.hpp` was not included in all hipCUB headers, resulting in build errors. + +## hipCUB-3.2.0 for ROCm 6.2.0 ### Added * Add `DeviceCopy` function to have parity with CUB. diff --git a/hipcub/include/hipcub/backend/rocprim/util_type.hpp b/hipcub/include/hipcub/backend/rocprim/util_type.hpp index dd550e55..91e2ac5c 100644 --- a/hipcub/include/hipcub/backend/rocprim/util_type.hpp +++ b/hipcub/include/hipcub/backend/rocprim/util_type.hpp @@ -547,12 +547,12 @@ struct BaseTraits static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { - return key_codec::encode(rocprim::detail::bit_cast(key)); + return key; } static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { - return key_codec::decode(rocprim::detail::bit_cast(key)); + return key; } static HIPCUB_HOST_DEVICE __forceinline__ T Max() @@ -596,12 +596,12 @@ struct BaseTraits static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { - return key_codec::encode(rocprim::detail::bit_cast(key)); + return key ^ HIGH_BIT; }; static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { - return key_codec::decode(rocprim::detail::bit_cast(key)); + return key ^ HIGH_BIT; }; static HIPCUB_HOST_DEVICE __forceinline__ T Max() @@ -695,12 +695,14 @@ struct BaseTraits static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { - return key_codec::encode(rocprim::detail::bit_cast(key)); + UnsignedBits mask = (key & HIGH_BIT) ? UnsignedBits(-1) : HIGH_BIT; + return key ^ mask; }; static HIPCUB_HOST_DEVICE __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { - return key_codec::decode(rocprim::detail::bit_cast(key)); + UnsignedBits mask = (key & HIGH_BIT) ? HIGH_BIT : UnsignedBits(-1); + return key ^ mask; }; static HIPCUB_HOST_DEVICE __forceinline__ T Max() { @@ -751,12 +753,12 @@ struct NumericTraits<__uint128_t> static __host__ __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { - return key_codec::encode(rocprim::detail::bit_cast(key)); + return key; } static __host__ __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { - return key_codec::decode(rocprim::detail::bit_cast(key)); + return key; } static __host__ __device__ __forceinline__ T Max() @@ -788,12 +790,12 @@ struct NumericTraits<__int128_t> static __host__ __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { - return key_codec::encode(rocprim::detail::bit_cast(key)); + return key ^ HIGH_BIT; }; static __host__ __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { - return key_codec::decode(rocprim::detail::bit_cast(key)); + return key ^ HIGH_BIT; }; static __host__ __device__ __forceinline__ T Max() diff --git a/scripts/copyright-date/check-copyright.sh b/scripts/copyright-date/check-copyright.sh index 98b6d407..3de80af0 100755 --- a/scripts/copyright-date/check-copyright.sh +++ b/scripts/copyright-date/check-copyright.sh @@ -61,7 +61,6 @@ if $forkdiff; then source_commit="remotes/$remote/HEAD" # don't use fork-point for finding fork point (lol) - # see: https://stackoverflow.com/a/53981615 diff_hash="$(git merge-base "$source_commit" "$branch")" fi