Skip to content

Commit

Permalink
Merge pull request #729 from CHIP-SPV/rtdevlib-ballot
Browse files Browse the repository at this point in the history
Link __ballot() implementation at runtime
  • Loading branch information
pvelesko authored Dec 19, 2023
2 parents d31f904 + 64fd61e commit c2f604a
Show file tree
Hide file tree
Showing 16 changed files with 159 additions and 36 deletions.
2 changes: 1 addition & 1 deletion .github/actions/get-shas/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ runs:
- id: spirv-llvm-sha
run: scripts/get_sha.sh >> $GITHUB_OUTPUT
env:
URL: https://api.github.com/repos/CHIP-SPV/SPIRV-LLVM-Translator/git/ref/heads/chipStar-llvm-${{ inputs.version }}
URL: https://api.github.com/repos/KhronosGroup/SPIRV-LLVM-Translator/git/ref/heads/llvm_release_${{ inputs.version }}0
TOKEN: u:${{github.token}}
shell: bash
- id: pocl-sha
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/presubmit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ jobs:
if: steps.llvm.outputs.cache-hit != 'true'
- uses: actions/checkout@v3
with:
repository: CHIP-SPV/SPIRV-LLVM-Translator
repository: KhronosGroup/SPIRV-LLVM-Translator
ref: ${{ steps.get-shas.outputs.spirv-llvm-sha }}
path: llvm-${{ matrix.version }}/llvm/projects/SPIRV-LLVM-Translator
if: steps.llvm.outputs.cache-hit != 'true'
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ Release notes for [1.1](docs/release_notes/chipStar_1.1.rst), [1.0](docs/release
(e.g. llvm\_release\_170 for LLVM 17)
[llvm-spirv](https://github.com/KhronosGroup/SPIRV-LLVM-Translator).
* Make sure the built llvm-spirv binary is installed into the same path as clang binary, otherwise clang might find and use a different llvm-spirv, leading to errors.
* For the best results, install it from a chipStar [branch](https://github.com/CHIP-SPV/SPIRV-LLVM-Translator/tree/chipStar-llvm-17) which has fixes that are not yet upstreamed.

### Compiling Clang, LLVM and SPIRV-LLVM-Translator

Expand Down
45 changes: 35 additions & 10 deletions bitcode/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ endif()

set(BITCODE_CL_COMPILE_FLAGS
-Xclang -finclude-default-header -O2 -x cl -cl-std=CL2.0
--target=${BC_TRIPLE} -DDEFAULT_WARP_SIZE=${DEFAULT_WARP_SIZE} -emit-llvm
-cl-ext=+cl_khr_subgroup --target=${BC_TRIPLE}
-DDEFAULT_WARP_SIZE=${DEFAULT_WARP_SIZE} -emit-llvm
${EXTRA_FLAGS})
set(BITCODE_C_COMPILE_FLAGS
--target=${BC_TRIPLE} -O2 -x c -DDEFAULT_WARP_SIZE=${DEFAULT_WARP_SIZE}
Expand Down Expand Up @@ -153,19 +154,25 @@ install(FILES "${CMAKE_BINARY_DIR}/${BC_DESTINATION}/${BC_FILE}"
# with the extension's atomic operations. Otherwise, the runtime links
# in a slower, emulated version.
#
# RTDEVLIB_SOURCES defines OpenCL C sources for the rtdevlib. They are
# RTDEVLIB_SOURCES* defines OpenCL C sources for the rtdevlib. They are
# compiled to SPIR-V binary and embedded into the CHIP
# library. <build-dir>/bitcode/rtdevlib-modules.h declares the
# embedded modules as 'std::array <basename-of-the-source>'.

#
# Use only characters allowed in C/C++/OpenCL-C language for the
# source names.
set(RTDEVLIB_SOURCES
#
# Sources requiring SPIR-V 1.2 at most.
set(RTDEVLIB_SOURCES_v1_2
atomicAddFloat_native atomicAddFloat_emulation
atomicAddDouble_native atomicAddDouble_emulation)

# Sources requiring SPIR-V 1.3 at most.
set(RTDEVLIB_SOURCES_v1_3
ballot_native)

# Alias for add_opencl_bitcode.
foreach(SOURCE IN LISTS RTDEVLIB_SOURCES)
foreach(SOURCE IN LISTS RTDEVLIB_SOURCES_v1_2 RTDEVLIB_SOURCES_v1_3)
add_opencl_bitcode(
"${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE}.cl"
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.bc")
Expand All @@ -176,7 +183,8 @@ add_custom_target("rtdevlib-bitcodes" DEPENDS ${RTDEVLIB_BITCODES})

# Compile LLVM bitcode to SPIR-V binary which is then embedded into
# std::array<unsigned char, N> <ARRAY_NAME>.
function(embed_spirv_in_cpp ARRAY_NAME BC_SOURCE OUTPUT_SOURCE OUTPUT_HEADER)
function(embed_spirv_in_cpp
ARRAY_NAME BC_SOURCE OUTPUT_SOURCE OUTPUT_HEADER MAX_SPIRV_VERSION)
set(SPIRV_EXTENSIONS "+SPV_EXT_shader_atomic_float_add")
get_filename_component(SOURCE_BASENAME "${BC_SOURCE}" NAME_WLE)
# Name of the intermediate SPIR-V binary. The name of the C array will be
Expand All @@ -186,7 +194,9 @@ function(embed_spirv_in_cpp ARRAY_NAME BC_SOURCE OUTPUT_SOURCE OUTPUT_HEADER)
OUTPUT "${OUTPUT_SOURCE}" "${OUTPUT_HEADER}"
DEPENDS "${BC_SOURCE}"
BYPRODUCTS "${SPIR_BINARY}"
COMMAND "${LLVM_SPIRV}" --spirv-ext=${SPIRV_EXTENSIONS}
COMMAND "${LLVM_SPIRV}"
--spirv-ext=${SPIRV_EXTENSIONS}
--spirv-max-version=${MAX_SPIRV_VERSION}
"${BC_SOURCE}" -o "${SPIR_BINARY}"
COMMAND ${CMAKE_SOURCE_DIR}/scripts/embed-binary-in-cpp.bash
${ARRAY_NAME} ${SPIR_BINARY} ${OUTPUT_SOURCE} ${OUTPUT_HEADER}
Expand All @@ -195,13 +205,27 @@ function(embed_spirv_in_cpp ARRAY_NAME BC_SOURCE OUTPUT_SOURCE OUTPUT_HEADER)
)
endfunction()

foreach(SOURCE IN LISTS RTDEVLIB_SOURCES)
foreach(SOURCE IN LISTS RTDEVLIB_SOURCES_v1_2)
embed_spirv_in_cpp(
${SOURCE}
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.bc"
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.cc"
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.h"
"1.2")
list(APPEND RTDEVLIB_SOURCES_v1_2
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.cc")
list(APPEND RTDEVLIB_HEADERS
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.h")
list(APPEND RTDEVLIB_SOURCES
endforeach()

foreach(SOURCE IN LISTS RTDEVLIB_SOURCES_v1_3)
embed_spirv_in_cpp(
${SOURCE}
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.bc"
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.cc"
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.h"
"1.3")
list(APPEND RTDEVLIB_SOURCES_v1_3
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.cc")
list(APPEND RTDEVLIB_HEADERS
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.h")
Expand All @@ -220,4 +244,5 @@ add_custom_command(

add_custom_target(rtdevlib-header DEPENDS ${RTDEVLIB_HEADER})

add_library(rtdevlib OBJECT ${RTDEVLIB_SOURCES} ${RTDEVLIB_HEADER})
add_library(rtdevlib OBJECT
${RTDEVLIB_SOURCES_v1_2} ${RTDEVLIB_SOURCES_v1_3} ${RTDEVLIB_HEADER})
37 changes: 37 additions & 0 deletions bitcode/ballot_native.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright (c) 2023 chipStar developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
// __ballot implementation with sub_group_ballot().

// SPIR-V requirements: 1.3+ unless environment supports
// cl_khr_subgroup_ballot or ZE_extension_subgroups.
// OpenCL requirements: cl_khr_subgroup_ballot.
// Level0 requirements: ZE_extension_subgroups.

#define OVERLOADED __attribute__((overloadable))

OVERLOADED ulong __chip_ballot(int predicate) {
#if DEFAULT_WARP_SIZE <= 32
return sub_group_ballot(predicate).x;
#else
return sub_group_ballot(predicate).x | (sub_group_ballot(predicate).y << 32);
#endif
}
10 changes: 2 additions & 8 deletions bitcode/devicelib.cl
Original file line number Diff line number Diff line change
Expand Up @@ -820,14 +820,8 @@ __SHFL_DOWN(ulong);
__SHFL_DOWN(float);
__SHFL_DOWN(double);

__attribute__((overloadable)) uint4 sub_group_ballot(int predicate);
EXPORT OVLD ulong __chip_ballot(int predicate) {
#if DEFAULT_WARP_SIZE <= 32
return sub_group_ballot(predicate).x;
#else
return sub_group_ballot(predicate).x | (sub_group_ballot(predicate).y << 32);
#endif
}
// The definition is linked at runtime from one of the ballot*.cl files.
EXPORT OVLD ulong __chip_ballot(int predicate);

EXPORT OVLD int __chip_all(int predicate) {
return __chip_ballot(predicate) == ~0;
Expand Down
6 changes: 3 additions & 3 deletions scripts/configure_llvm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ LINK_TYPE=$3
# set the brach name for checkuot based on only-necessary-spirv-exts
if [ "$4" == "on" ]; then
LLVM_BRANCH="spirv-ext-fixes-${VERSION}"
TRANSLATOR_BRANCH="chipStar-llvm-${VERSION}"
TRANSLATOR_BRANCH="llvm_release_${VERSION}0"
else
LLVM_BRANCH="chipStar-llvm-${VERSION}"
TRANSLATOR_BRANCH="chipStar-llvm-${VERSION}"
TRANSLATOR_BRANCH="llvm_release_${VERSION}0"
fi

export LLVM_DIR=`pwd`/llvm-project/llvm
Expand All @@ -49,7 +49,7 @@ export LLVM_DIR=`pwd`/llvm-project/llvm
if [ ! -d llvm-project ]; then
git clone https://github.com/CHIP-SPV/llvm-project.git -b ${LLVM_BRANCH} --depth 1
cd ${LLVM_DIR}/projects
git clone https://github.com/CHIP-SPV/SPIRV-LLVM-Translator.git -b ${TRANSLATOR_BRANCH} --depth 1
git clone https://github.com/KhronosGroup/SPIRV-LLVM-Translator.git -b ${TRANSLATOR_BRANCH} --depth 1
cd ${LLVM_DIR}
else
# Warn the user, error out
Expand Down
6 changes: 3 additions & 3 deletions scripts/unit_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ build_type=$(echo "$1" | tr '[:lower:]' '[:upper:]')

if [ "$2" == "llvm-15" ]; then
LLVM=llvm-15
CLANG=llvm/15.0/dynamic
CLANG=llvm/15.0-unpatched-spirv
elif [ "$2" == "llvm-16" ]; then
LLVM=llvm-16
CLANG=llvm/16.0/dynamic
CLANG=llvm/16.0-unpatched-spirv
elif [ "$2" == "llvm-17" ]; then
LLVM=llvm-17
CLANG=llvm/17.0/dynamic
CLANG=llvm/17.0-unpatched-spirv
else
echo "$2"
echo "Invalid 2nd argument. Use either 'llvm-15', 'llvm-16' or 'llvm-17'."
Expand Down
2 changes: 2 additions & 0 deletions src/CHIPBackend.hh
Original file line number Diff line number Diff line change
Expand Up @@ -1596,6 +1596,8 @@ public:
void invalidateDeviceVariables();
void deallocateDeviceVariables();

virtual bool hasBallot() const noexcept { return false; }

protected:
/**
* @brief The backend hook for reset().
Expand Down
4 changes: 4 additions & 0 deletions src/backend/Level0/CHIPBackendLevel0.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2501,6 +2501,10 @@ static void appendDeviceLibrarySources(
else
AppendSource(chipstar::atomicAddDouble_emulation);

// OpGroupNonUniformBallot instructions seems to compile and work
// despite not having ZE_extension_subgroups.
AppendSource(chipstar::ballot_native);

assert(SrcSizes.size() == Sources.size() &&
Sources.size() == BuildFlags.size());
}
Expand Down
2 changes: 2 additions & 0 deletions src/backend/Level0/CHIPBackendLevel0.hh
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,8 @@ public:
const ze_float_atomic_ext_properties_t &getFpAtomicProps() const noexcept {
return FpAtomicProps_;
}

bool hasBallot() const noexcept override { return true; }
};

class CHIPBackendLevel0 : public chipstar::Backend {
Expand Down
23 changes: 16 additions & 7 deletions src/backend/OpenCL/CHIPBackendOpenCL.cc
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,10 @@ CHIPDeviceOpenCL::CHIPDeviceOpenCL(CHIPContextOpenCL *ChipCtx,
*DevIn, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT);
Fp64AtomicAddCapabilities_ = getFPAtomicCapabilities(
*DevIn, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT);

auto DevExts = DevIn->getInfo<CL_DEVICE_EXTENSIONS>();
HasSubgroupBallot_ =
DevExts.find("cl_khr_subgroup_ballot") != std::string::npos;
}

CHIPDeviceOpenCL *CHIPDeviceOpenCL::create(cl::Device *ClDevice,
Expand Down Expand Up @@ -789,18 +793,23 @@ static void appendRuntimeObjects(cl::Context Ctx, CHIPDeviceOpenCL &ChipDev,

// TODO: Reuse already compiled modules.

auto AppendSource = [&](auto &Source) -> void {
Objects.push_back(compileIL(Ctx, ChipDev, Source));
};

if (ChipDev.hasFP32AtomicAdd())
Objects.push_back(compileIL(Ctx, ChipDev, chipstar::atomicAddFloat_native));
AppendSource(chipstar::atomicAddFloat_native);
else
Objects.push_back(
compileIL(Ctx, ChipDev, chipstar::atomicAddFloat_emulation));
AppendSource(chipstar::atomicAddFloat_emulation);

if (ChipDev.hasFP64AtomicAdd())
Objects.push_back(
compileIL(Ctx, ChipDev, chipstar::atomicAddDouble_native));
AppendSource(chipstar::atomicAddDouble_native);
else
Objects.push_back(
compileIL(Ctx, ChipDev, chipstar::atomicAddDouble_emulation));
AppendSource(chipstar::atomicAddDouble_emulation);

if (ChipDev.hasBallot())
AppendSource(chipstar::ballot_native);
// No fall-back implementation for ballot - let linker raise an error.
}

void CHIPModuleOpenCL::compile(chipstar::Device *ChipDev) {
Expand Down
3 changes: 3 additions & 0 deletions src/backend/OpenCL/CHIPBackendOpenCL.hh
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ private:

cl_device_fp_atomic_capabilities_ext Fp32AtomicAddCapabilities_;
cl_device_fp_atomic_capabilities_ext Fp64AtomicAddCapabilities_;
bool HasSubgroupBallot_ = false;

public:
~CHIPDeviceOpenCL() override {
Expand Down Expand Up @@ -243,6 +244,8 @@ public:
return (Fp64AtomicAddCapabilities_ & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) &&
(Fp64AtomicAddCapabilities_ & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT);
}

bool hasBallot() const noexcept override { return HasSubgroupBallot_; }
};

class CHIPQueueOpenCL : public chipstar::Queue {
Expand Down
8 changes: 6 additions & 2 deletions src/spirv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <vector>
#include <unordered_map>
#include <memory>
#include <regex>

#include "common.hh"
#include "spirv.hh"
Expand Down Expand Up @@ -810,6 +811,9 @@ bool filterSPIRV(const char *Bytes, size_t NumBytes, std::string &Dst) {
Dst.reserve(NumBytes);
Dst.append(Bytes, (const char *)WordsPtr); // Copy the header.

// Matches chipStar device library and SPIR-V translator symbols.
auto CompilerMagicSymbol =
std::regex(R"RE((__spirv_|__chip_|_Z\d*__chip_).*)RE");
std::set<std::string_view> EntryPoints;
std::unordered_set<InstWord> BuiltIns;
std::unordered_map<InstWord, std::string_view> MissingDefs;
Expand Down Expand Up @@ -858,8 +862,8 @@ bool filterSPIRV(const char *Bytes, size_t NumBytes, std::string &Dst) {
//
// Issue warning unless it's a builtin, magic chipStar or
// llvm-spirv symbol.
if (!startsWith(LinkName, "spirv_") &&
!startsWith(LinkName, "__chip_") &&
if (!std::regex_match(LinkName.begin(), LinkName.end(),
CompilerMagicSymbol) &&
!BuiltIns.count(Insn.getWord(1)))
MissingDefs[Insn.getWord(1)] = LinkName;
}
Expand Down
2 changes: 2 additions & 0 deletions tests/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,5 @@ add_hip_runtime_test(TestBitInsert.hip)
# Imports __bitinsert_*.
target_compile_definitions(TestBitInsert
PRIVATE CHIP_ENABLE_NON_COMPLIANT_DEVICELIB_CODE)

add_hip_runtime_test(TestBallot.hip)
42 changes: 42 additions & 0 deletions tests/runtime/TestBallot.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include "CHIPDriver.hh"
#include <hip/hip_runtime.h>
#include <cstdint>
#include <cstdlib>

__global__ void k(uint64_t *Out, uint64_t In) {
auto Votes = __ballot((In >> threadIdx.x) & 1u);
if (threadIdx.x == 0)
*Out = Votes;
}

void checkBallot(unsigned BlockSize, uint64_t In) {
uint64_t *OutD, OutH = ~In;
(void)hipMalloc(&OutD, sizeof(uint64_t));
k<<<1, BlockSize>>>(OutD, In);
(void)hipMemcpy(&OutH, OutD, sizeof(uint64_t), hipMemcpyHostToDevice);

hipDeviceProp_t Props{};
(void)hipGetDeviceProperties(&Props, 0);

if (Props.warpSize < 64 && OutH >> Props.warpSize) {
printf("Error: Garbage bits in the __ballot result!\n");
exit(1);
}

if (OutH != In) {
printf("BlockSize=%u:\nError: Expected '%lu'. Got '%lu'\n", BlockSize, In,
OutH);
exit(1);
}
(void)hipFree(OutD);
}

int main() {
if (!Backend->getActiveDevice()->hasBallot()) {
printf("SKIP: device does not support __ballot()\n");
return 2;
}

checkBallot(32, 0xBADF00D1);
return 0;
}

0 comments on commit c2f604a

Please sign in to comment.