diff --git a/cmake/continuations.cmake b/cmake/continuations.cmake index e6716ad654..f13118c443 100644 --- a/cmake/continuations.cmake +++ b/cmake/continuations.cmake @@ -25,21 +25,10 @@ set(LLPC_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/..") -include("${LLPC_SOURCE_DIR}/cmake/llpc_version.cmake") -include("${LLPC_SOURCE_DIR}/cmake/compilerutils.cmake") +include("${LLPC_SOURCE_DIR}/cmake/llvmraytracing.cmake") -# Macro to add continuations and its dependencies as LLVM external projects. -# This appends the project names to LLVM_EXTERNAL_PROJECTS and sets each LLVM_EXTERNAL_*_SOURCE_DIR, -# all in the caller's scope. +# Deprecated transition macro for refactoring transition; use add_llvmraytracing_projects instead macro(add_continuations_projects) - add_llpc_version_projects() - add_compilerutils_projects() - if (NOT continuations IN_LIST LLVM_EXTERNAL_PROJECTS) - if (NOT llvm_dialects IN_LIST LLVM_EXTERNAL_PROJECTS) - list(APPEND LLVM_EXTERNAL_PROJECTS llvm_dialects) - set(LLVM_EXTERNAL_LLVM_DIALECTS_SOURCE_DIR "${LLPC_SOURCE_DIR}/imported/llvm-dialects") - endif() - list(APPEND LLVM_EXTERNAL_PROJECTS Continuations) - set(LLVM_EXTERNAL_CONTINUATIONS_SOURCE_DIR "${LLPC_SOURCE_DIR}/shared/continuations") - endif() + add_llvmraytracing_projects() + set(LLPC_RAYTRACING_ADD_TRANSITION_TARGETS ON) endmacro() diff --git a/cmake/lgc.cmake b/cmake/lgc.cmake index baab57084b..9d4a17f19c 100644 --- a/cmake/lgc.cmake +++ b/cmake/lgc.cmake @@ -26,14 +26,14 @@ set(LLPC_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/..") include("${LLPC_SOURCE_DIR}/cmake/llpc_version.cmake") -include("${LLPC_SOURCE_DIR}/cmake/continuations.cmake") +include("${LLPC_SOURCE_DIR}/cmake/llvmraytracing.cmake") # Macro to add LGC and its dependencies as LLVM external projects. # This appends the project names to LLVM_EXTERNAL_PROJECTS and sets each LLVM_EXTERNAL_*_SOURCE_DIR, # all in the caller's scope. macro(add_lgc_projects) add_llpc_version_projects() - add_continuations_projects() + add_llvmraytracing_projects() if (NOT lgc IN_LIST LLVM_EXTERNAL_PROJECTS) if (NOT llvm_dialects IN_LIST LLVM_EXTERNAL_PROJECTS) list(APPEND LLVM_EXTERNAL_PROJECTS llvm_dialects) diff --git a/cmake/llvmraytracing.cmake b/cmake/llvmraytracing.cmake new file mode 100644 index 0000000000..8bed197403 --- /dev/null +++ b/cmake/llvmraytracing.cmake @@ -0,0 +1,45 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +set(LLPC_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/..") + +include("${LLPC_SOURCE_DIR}/cmake/llpc_version.cmake") +include("${LLPC_SOURCE_DIR}/cmake/compilerutils.cmake") + +# Macro to add raytracing and its dependencies as LLVM external projects. +# This appends the project names to LLVM_EXTERNAL_PROJECTS and sets each LLVM_EXTERNAL_*_SOURCE_DIR, +# all in the caller's scope. +macro(add_llvmraytracing_projects) + add_llpc_version_projects() + add_compilerutils_projects() + if (NOT raytracing IN_LIST LLVM_EXTERNAL_PROJECTS) + if (NOT llvm_dialects IN_LIST LLVM_EXTERNAL_PROJECTS) + list(APPEND LLVM_EXTERNAL_PROJECTS llvm_dialects) + set(LLVM_EXTERNAL_LLVM_DIALECTS_SOURCE_DIR "${LLPC_SOURCE_DIR}/imported/llvm-dialects") + endif() + list(APPEND LLVM_EXTERNAL_PROJECTS raytracing) + set(LLVM_EXTERNAL_RAYTRACING_SOURCE_DIR "${LLPC_SOURCE_DIR}/llvmraytracing") + endif() +endmacro() diff --git a/compilerutils/lib/CompilerUtils.cpp b/compilerutils/lib/CompilerUtils.cpp index a2b50ab0f3..dd82f770ac 100644 --- a/compilerutils/lib/CompilerUtils.cpp +++ b/compilerutils/lib/CompilerUtils.cpp @@ -114,6 +114,10 @@ Function *CompilerUtils::cloneFunctionHeader(Function &f, FunctionType *newType, } else { // Insert new function before f to facilitate writing tests f.getParent()->getFunctionList().insert(f.getIterator(), newFunc); +#if !defined(LLVM_MAIN_REVISION) || LLVM_MAIN_REVISION >= 489715 + // If targetModule is null then take flag from original function. + newFunc->setIsNewDbgInfoFormat(f.IsNewDbgInfoFormat); +#endif } newFunc->copyAttributesFrom(&f); @@ -137,8 +141,8 @@ namespace { std::string getCrossModuleName(GlobalValue &gv) { if (auto *fn = dyn_cast(&gv)) { // Intrinsics should not be renamed since the IR verifier insists on a "correct" name mangling based on any - // overloaded types. - if (fn->isIntrinsic()) + // overloaded types. Lgc dialects also require exact name for similar reason. + if (fn->isIntrinsic() || fn->getName().starts_with("lgc.")) return fn->getName().str(); } return (Twine(gv.getName()) + ".cloned." + gv.getParent()->getName()).str(); @@ -242,7 +246,17 @@ iterator_range CompilerUtils::CrossModuleInliner::inlineCall // Copy code InlineFunctionInfo ifi; +#if !defined(LLVM_MAIN_REVISION) || LLVM_MAIN_REVISION >= 489715 + // calleeFunc is not from targetMod, check if we need to convert it. + bool shouldConvert = !calleeFunc->IsNewDbgInfoFormat && targetMod->IsNewDbgInfoFormat; + if (shouldConvert) + calleeFunc->convertToNewDbgValues(); +#endif auto res = InlineFunction(cb, ifi); +#if !defined(LLVM_MAIN_REVISION) || LLVM_MAIN_REVISION >= 489715 + if (shouldConvert) + calleeFunc->convertFromNewDbgValues(); +#endif if (!res.isSuccess()) report_fatal_error(Twine("Failed to inline ") + calleeFunc->getName() + ": " + res.getFailureReason()); @@ -325,6 +339,7 @@ CompilerUtils::CrossModuleInliner::inlineCall(IRBuilder<> &b, llvm::Function *ca result = fakeUse->getOperand(0); fakeUse->eraseFromParent(); } + return {result, newBBs}; } diff --git a/imported/llvm-dialects b/imported/llvm-dialects index 69e114f9d8..3f9e17f5f4 160000 --- a/imported/llvm-dialects +++ b/imported/llvm-dialects @@ -1 +1 @@ -Subproject commit 69e114f9d8863ab056cf0e2392d82daadd4b0b95 +Subproject commit 3f9e17f5f44e825de6450cc8a5aff7f3d5a32ef2 diff --git a/include/gpurt-compiler.h b/include/gpurt-compiler.h index eeaa71b26a..f7912ea91f 100644 --- a/include/gpurt-compiler.h +++ b/include/gpurt-compiler.h @@ -32,8 +32,6 @@ #pragma once -#define MAKE_GPURT_VERSION(MAJOR, MINOR) ((MAJOR << 16) | MINOR) - namespace GpuRt { #pragma pack(push, 4) diff --git a/include/vkgcDefs.h b/include/vkgcDefs.h index 3c5d92f5ed..8be6f50d83 100644 --- a/include/vkgcDefs.h +++ b/include/vkgcDefs.h @@ -451,7 +451,7 @@ struct PipelineOptions { ///< depending the values of tessellation factors. bool enableInterpModePatch; ///< If set, per-sample interpolation for nonperspective and smooth input is enabled bool pageMigrationEnabled; ///< If set, page migration is enabled - uint32_t optimizationLevel; ///< The higher the number the more optimizations will be performed. Valid values are + unsigned optimizationLevel; ///< The higher the number the more optimizations will be performed. Valid values are ///< between 0 and 3. unsigned overrideThreadGroupSizeX; ///< Override value for ThreadGroupSizeX unsigned overrideThreadGroupSizeY; ///< Override value for ThreadGroupSizeY @@ -517,21 +517,21 @@ struct ShaderModuleEntryData { /// Represents the shader resources struct ResourcesNodes { ResourceNodeData *pInputInfo; - uint32_t inputInfoCount; + unsigned inputInfoCount; ResourceNodeData *pOutputInfo; - uint32_t outputInfoCount; + unsigned outputInfoCount; ResourceNodeData *pUniformBufferInfo; - uint32_t uniformBufferInfoCount; + unsigned uniformBufferInfoCount; ResourceNodeData *pShaderStorageInfo; - uint32_t shaderStorageInfoCount; + unsigned shaderStorageInfoCount; ResourceNodeData *pTexturesInfo; - uint32_t textureInfoCount; + unsigned textureInfoCount; ResourceNodeData *pImagesInfo; - uint32_t imageInfoCount; + unsigned imageInfoCount; ResourceNodeData *pAtomicCounterInfo; - uint32_t atomicCounterInfoCount; + unsigned atomicCounterInfoCount; ResourceNodeData *pDefaultUniformInfo; - uint32_t defaultUniformInfoCount; + unsigned defaultUniformInfoCount; }; /// Represents usage info of a shader module @@ -567,6 +567,7 @@ struct ShaderModuleUsage { unsigned localSizeY; ///< Compute shader work-group size in the Y dimension unsigned localSizeZ; ///< Compute shader work-group size in the Z dimension bool useBarycentric; ///< Whether to use gl_BarycentricXX or pervertexEXT decoration + bool disableDualSource; ///< Whether disable dualSource blend }; /// Represents common part of shader module data @@ -889,33 +890,33 @@ struct SamplerYCbCrConversionMetaData { /// Represents assistant info for each vertex attribute in uber fetch shader struct UberFetchShaderAttribInfo { - uint32_t binding : 8; ///< Attribute binding in vertex buffer table - uint32_t perInstance : 1; ///< Whether vertex input rate is per-instance - uint32_t isCurrent : 1; ///< Whether it is a current attribute - uint32_t isPacked : 1; ///< Whether it is a packed format - uint32_t isFixed : 1; ///< Whether it is a fixed format - uint32_t componentSize : 4; ///< Byte size per component - uint32_t componentMask : 4; ///< Component mask of this attribute. - uint32_t isBgra : 1; ///< Whether is BGRA format - uint32_t reserved : 11; ///< reserved bits in DWORD 0 - uint32_t offset; ///< Attribute offset - uint32_t instanceDivisor; ///< Reciprocal of instance divisor - uint32_t bufferFormat; ///< Buffer format info. it is a copy of buffer SRD DWORD3. + unsigned binding : 8; ///< Attribute binding in vertex buffer table + unsigned perInstance : 1; ///< Whether vertex input rate is per-instance + unsigned isCurrent : 1; ///< Whether it is a current attribute + unsigned isPacked : 1; ///< Whether it is a packed format + unsigned isFixed : 1; ///< Whether it is a fixed format + unsigned componentSize : 4; ///< Byte size per component + unsigned componentMask : 4; ///< Component mask of this attribute. + unsigned isBgra : 1; ///< Whether is BGRA format + unsigned reserved : 11; ///< reserved bits in DWORD 0 + unsigned offset; ///< Attribute offset + unsigned instanceDivisor; ///< Reciprocal of instance divisor + unsigned bufferFormat; ///< Buffer format info. it is a copy of buffer SRD DWORD3. }; /// Represents the bit field info of struct BilUberFetchShaderAttribInfo -constexpr uint32_t UberFetchShaderAttribMaskBinding = 0x00000FFu; -constexpr uint32_t UberFetchShaderAttribMaskPerInstance = 0x0000100u; -constexpr uint32_t UberFetchShaderAttribMaskIsCurrent = 0x0000200u; -constexpr uint32_t UberFetchShaderAttribMaskIsPacked = 0x0000400u; -constexpr uint32_t UberFetchShaderAttribMaskIsFixed = 0x0000800u; -constexpr uint32_t UberFetchShaderAttribMaskComponentSize = 0x000F000u; -constexpr uint32_t UberFetchShaderAttribShiftComponentSize = 12u; -constexpr uint32_t UberFetchShaderAttribMaskComponent0 = 0x0010000u; -constexpr uint32_t UberFetchShaderAttribMaskComponent1 = 0x0020000u; -constexpr uint32_t UberFetchShaderAttribMaskComponent2 = 0x0040000u; -constexpr uint32_t UberFetchShaderAttribMaskComponent3 = 0x0080000u; -constexpr uint32_t UberFetchShaderAttribMaskIsBgra = 0x0100000u; +constexpr unsigned UberFetchShaderAttribMaskBinding = 0x00000FFu; +constexpr unsigned UberFetchShaderAttribMaskPerInstance = 0x0000100u; +constexpr unsigned UberFetchShaderAttribMaskIsCurrent = 0x0000200u; +constexpr unsigned UberFetchShaderAttribMaskIsPacked = 0x0000400u; +constexpr unsigned UberFetchShaderAttribMaskIsFixed = 0x0000800u; +constexpr unsigned UberFetchShaderAttribMaskComponentSize = 0x000F000u; +constexpr unsigned UberFetchShaderAttribShiftComponentSize = 12u; +constexpr unsigned UberFetchShaderAttribMaskComponent0 = 0x0010000u; +constexpr unsigned UberFetchShaderAttribMaskComponent1 = 0x0020000u; +constexpr unsigned UberFetchShaderAttribMaskComponent2 = 0x0040000u; +constexpr unsigned UberFetchShaderAttribMaskComponent3 = 0x0080000u; +constexpr unsigned UberFetchShaderAttribMaskIsBgra = 0x0100000u; /// Represents the bit field info of struct BilUberFetchShaderAttribInfo @@ -974,7 +975,7 @@ struct BvhShaderResourceDescriptor { }; // Corresponds to gl_RayFlags* in GLSL_EXT_ray_tracing.txt -enum RayTracingRayFlag : uint32_t { +enum RayTracingRayFlag : unsigned { RayTracingRayFlagNone = 0x00, // gl_RayFlagsNoneEXT RayTracingRayFlagForceOpaque = 0x01, // gl_RayFlagsOpaqueEXT RayTracingRayFlagForceNonOpaque = 0x02, // gl_RayFlagsNoOpaqueEXT @@ -1130,8 +1131,8 @@ struct RtState { }; struct UniformConstantMapEntry { - uint32_t location; ///< Starting location of the uniform constant variable - uint32_t offset; ///< Offset of the uniform constant variable in the final buffer + unsigned location; ///< Starting location of the uniform constant variable + unsigned offset; ///< Offset of the uniform constant variable in the final buffer }; struct UniformConstantMap { @@ -1250,15 +1251,28 @@ struct GraphicsPipelineBuildInfo { #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 BinaryData shaderLibrary; ///< SPIR-V library binary data #endif - RtState rtState; ///< Ray tracing state - bool originUpperLeft; ///< Whether origin coordinate of framebuffer is upper-left. - const void *pClientMetadata; ///< Pointer to (optional) client-defined data to be stored inside the ELF - size_t clientMetadataSize; ///< Size (in bytes) of the client-defined data - unsigned numUniformConstantMaps; ///< Number of uniform constant maps - UniformConstantMap **ppUniformMaps; ///< Pointers to array of pointers for the uniform constant map. - ApiXfbOutData apiXfbOutData; ///< Transform feedback data specified by API interface. - bool vbAddressLowBitsKnown; ///< Whether vbAddressLowBits is valid + RtState rtState; ///< Ray tracing state +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 71 + bool originUpperLeft; ///< Whether origin coordinate of framebuffer is upper-left. + unsigned numUniformConstantMaps; ///< Number of uniform constant maps + UniformConstantMap **ppUniformMaps; ///< Pointers to array of pointers for the uniform constant map. + ApiXfbOutData apiXfbOutData; ///< Transform feedback data specified by API interface. + bool vbAddressLowBitsKnown; ///< Whether vbAddressLowBits is valid uint8_t vbAddressLowBits[MaxVertexBindings]; ///< Lowest two bits of vertex buffer addresses + const auto &getGlState() const { return *this; } +#else + struct { + bool originUpperLeft; ///< Whether origin coordinate of framebuffer is upper-left. + unsigned numUniformConstantMaps; ///< Number of uniform constant maps + UniformConstantMap **ppUniformMaps; ///< Pointers to array of pointers for the uniform constant map. + ApiXfbOutData apiXfbOutData; ///< Transform feedback data specified by API interface. + bool vbAddressLowBitsKnown; ///< Whether vbAddressLowBits is valid + uint8_t vbAddressLowBits[MaxVertexBindings]; ///< Lowest two bits of vertex buffer addresses + } glState; + const auto &getGlState() const { return glState; } +#endif + const void *pClientMetadata; ///< Pointer to (optional) client-defined data to be stored inside the ELF + size_t clientMetadataSize; ///< Size (in bytes) of the client-defined data }; /// Represents info to build a compute pipeline. diff --git a/lgc/CMakeLists.txt b/lgc/CMakeLists.txt index 1535bf5b0e..b7a8cb97cc 100644 --- a/lgc/CMakeLists.txt +++ b/lgc/CMakeLists.txt @@ -53,7 +53,7 @@ add_llvm_library(LLVMlgc LINK_COMPONENTS Vectorize ) -llvm_map_components_to_libnames(extra_llvm_libs CompilerUtils Continuations) +llvm_map_components_to_libnames(extra_llvm_libs CompilerUtils Raytracing) target_link_libraries(LLVMlgc PUBLIC llvm_dialects ${extra_llvm_libs} llpc_version) ### Cached Project Options ############################################################################################# @@ -65,7 +65,11 @@ set_compiler_options(LLVMlgc ${LLPC_ENABLE_WERROR}) ### TableGen for LGC dialect ########################################################################################### -set(LGC_TABLEGEN_EXE $) +if (EXISTS ${LLVM_TOOLS_BINARY_PATH}/llvm-dialects-tblgen) + set(LGC_TABLEGEN_EXE ${LLVM_TOOLS_BINARY_PATH}/llvm-dialects-tblgen) +else() + set(LGC_TABLEGEN_EXE $) +endif() set(LGC_TABLEGEN_TARGET llvm-dialects-tblgen) set(LLVM_TARGET_DEFINITIONS interface/lgc/LgcDialect.td) @@ -204,6 +208,9 @@ target_sources(LLVMlgc PRIVATE util/StartStopTimer.cpp ) +# lgc/interface/lgc +target_sources(LLVMlgc PRIVATE interface/lgc/LgcDialect.td) + add_subdirectory(disassembler) add_subdirectory(tool/lgc) add_subdirectory(test) @@ -212,4 +219,4 @@ if (LLPC_BUILD_TESTS) add_subdirectory(unittests) endif() -target_link_libraries(LLVMlgc PRIVATE LLVMContinuations) +target_link_libraries(LLVMlgc PRIVATE LLVMRaytracing) diff --git a/lgc/builder/ArithBuilder.cpp b/lgc/builder/ArithBuilder.cpp index 186f2c1e63..14efa07468 100644 --- a/lgc/builder/ArithBuilder.cpp +++ b/lgc/builder/ArithBuilder.cpp @@ -836,8 +836,8 @@ Value *BuilderImpl::CreateNormalizeVector(Value *x, const Twine &instName) { Value *result = nullptr; if (x->getType()->getScalarType()->isFloatTy()) { // Make sure a FP32 zero vector is normalized to a FP32 zero vector, rather than NaNs. - if (!getFastMathFlags().noSignedZeros() || !getFastMathFlags().noInfs() || !getFastMathFlags().noNaNs()) { - // When NSZ, NoInfs, or NoNaNs is not specified, we avoid using fmul_legacy since it is not IEEE compliant. + if (!getFastMathFlags().noSignedZeros() || !getFastMathFlags().noNaNs()) { + // When NSZ or NoNaNs is not specified, we avoid using fmul_legacy since it is not IEEE compliant. auto zero = ConstantFP::get(getFloatTy(), 0.0); auto isZeroDot = CreateFCmpOEQ(dot, zero); rsq = CreateSelect(isZeroDot, zero, rsq); @@ -930,18 +930,17 @@ Value *BuilderImpl::CreateRefract(Value *i, Value *n, Value *eta, const Twine &i // @param maxVal : Maximum of clamp range // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateFClamp(Value *x, Value *minVal, Value *maxVal, const Twine &instName) { - // For float, and for half on GFX9+, we can use the fmed3 instruction. + // For float and half, we can use the fmed3 instruction. // But we can only do this if we do not need NaN preservation. Value *result = nullptr; - if (getFastMathFlags().noNaNs() && (x->getType()->getScalarType()->isFloatTy() || - (getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 9 && - x->getType()->getScalarType()->isHalfTy()))) { + if (getFastMathFlags().noNaNs() && + (x->getType()->getScalarType()->isFloatTy() || x->getType()->getScalarType()->isHalfTy())) { result = scalarize(x, minVal, maxVal, [this](Value *x, Value *minVal, Value *maxVal) { return CreateIntrinsic(Intrinsic::amdgcn_fmed3, x->getType(), {x, minVal, maxVal}); }); result->setName(instName); } else { - // For half on GFX8 or earlier, or for double, use a combination of fmin and fmax. + // For double, use a combination of fmin and fmax. CallInst *max = CreateMaxNum(x, minVal); max->setFastMathFlags(getFastMathFlags()); CallInst *min = CreateMinNum(max, maxVal, instName); @@ -1037,17 +1036,16 @@ Value *BuilderImpl::CreateFMax3(Value *value1, Value *value2, Value *value3, con // @param value3 : Third value // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateFMid3(Value *value1, Value *value2, Value *value3, const Twine &instName) { - // For float, and for half on GFX9+, we can use the fmed3 instruction. + // For float and half, we can use the fmed3 instruction. // But we can only do this if we do not need NaN preservation. Value *result = nullptr; - if (getFastMathFlags().noNaNs() && (value1->getType()->getScalarType()->isFloatTy() || - (getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 9 && - value1->getType()->getScalarType()->isHalfTy()))) { + if (getFastMathFlags().noNaNs() && + (value1->getType()->getScalarType()->isFloatTy() || value1->getType()->getScalarType()->isHalfTy())) { result = scalarize(value1, value2, value3, [this](Value *value1, Value *value2, Value *value3) { return CreateIntrinsic(Intrinsic::amdgcn_fmed3, value1->getType(), {value1, value2, value3}); }); } else { - // For half on GFX8 or earlier, use a combination of fmin and fmax. + // For double, use a combination of fmin and fmax. CallInst *min1 = CreateMinNum(value1, value2); min1->setFastMathFlags(getFastMathFlags()); CallInst *max1 = CreateMaxNum(value1, value2); diff --git a/lgc/builder/Builder.cpp b/lgc/builder/Builder.cpp index 5094aae178..e2312194f6 100644 --- a/lgc/builder/Builder.cpp +++ b/lgc/builder/Builder.cpp @@ -207,6 +207,16 @@ Constant *BuilderCommon::getFpConstant(Type *ty, APFloat value) { return ConstantFP::get(ty, value); } +// ===================================================================================================================== +// Create alloca for given input type. +// +// @param ty : pointer type. +Value *BuilderCommon::CreateAllocaAtFuncEntry(Type *ty) { + IRBuilderBase::InsertPointGuard ipg(*this); + SetInsertPointPastAllocas(GetInsertBlock()->getParent()); + return CreateAlloca(ty); +} + // ===================================================================================================================== // Get a constant of FP or vector of FP type for the value PI/180, for converting radians to degrees. // diff --git a/lgc/builder/ImageBuilder.cpp b/lgc/builder/ImageBuilder.cpp index ee2583e446..6dbc21e119 100644 --- a/lgc/builder/ImageBuilder.cpp +++ b/lgc/builder/ImageBuilder.cpp @@ -427,7 +427,6 @@ Value *BuilderImpl::CreateImageLoad(Type *resultTy, unsigned dim, unsigned flags getPipelineState()->getShaderResourceUsage(m_shaderStage.value())->useImages = true; getPipelineState()->getShaderResourceUsage(m_shaderStage.value())->resourceRead = true; assert(coord->getType()->getScalarType()->isIntegerTy(32)); - imageDesc = patchCubeDescriptor(imageDesc, dim); coord = handleFragCoordViewIndex(coord, flags, dim); unsigned dmask = 1; @@ -630,7 +629,6 @@ Value *BuilderImpl::CreateImageStore(Value *texel, unsigned dim, unsigned flags, // Mark usage of images, to allow the compute workgroup reconfiguration optimization. getPipelineState()->getShaderResourceUsage(m_shaderStage.value())->resourceWrite = true; assert(coord->getType()->getScalarType()->isIntegerTy(32)); - imageDesc = patchCubeDescriptor(imageDesc, dim); coord = handleFragCoordViewIndex(coord, flags, dim); // For 64-bit texel, only the first component is stored @@ -1142,7 +1140,6 @@ Value *BuilderImpl::CreateImageAtomicCommon(unsigned atomicOp, unsigned dim, uns unsigned imageDescArgIndex = 0; if (imageDesc->getType() == getDescTy(ResourceNodeType::DescriptorResource)) { // Resource descriptor. Use the image atomic instruction. - imageDesc = patchCubeDescriptor(imageDesc, dim); args.push_back(inputValue); if (atomicOp == AtomicOpCompareSwap) args.push_back(comparatorValue); @@ -1732,53 +1729,6 @@ void BuilderImpl::combineCubeArrayFaceAndSlice(Value *coord, SmallVectorImplgetTargetInfo().getGfxIpVersion().major >= 9) - return desc; - - // Extract the depth. - Value *elem4 = CreateExtractElement(desc, 4); - Value *depth = CreateAnd(elem4, getInt32(0x1FFF)); - - // Change to depth * 6 + 5 - depth = CreateMul(depth, getInt32(6)); - depth = CreateAdd(depth, getInt32(5)); - elem4 = CreateAnd(elem4, getInt32(0xFFFFE000)); - elem4 = CreateOr(elem4, depth); - - // Change resource type to 2D array (0xD) - Value *originalElem3 = CreateExtractElement(desc, 3); - Value *elem3 = originalElem3; - elem3 = CreateAnd(elem3, getInt32(0x0FFFFFFF)); - elem3 = CreateOr(elem3, getInt32(0xD0000000)); - - // If allowNullDescriptor is on and image descriptor is a null descriptor, keep elem3 and elem4 be zero - if (m_pipelineState->getOptions().allowNullDescriptor) { - if (m_pipelineState->getOptions().maskOffNullDescriptorTypeField) { - GfxIpVersion gfxIp = getPipelineState()->getTargetInfo().getGfxIpVersion(); - SqImgRsrcRegHandler proxySqRsrcRegHelper(this, desc, &gfxIp); - unsigned typeMask = proxySqRsrcRegHelper.getRegMask(SqRsrcRegs::Type); - // Mask off the type bits for the null descriptor - originalElem3 = CreateAnd(originalElem3, getInt32(~typeMask)); - } - // Check dword3 against 0 for a null descriptor - Value *zero = getInt32(0); - Value *isNullDesc = CreateICmpEQ(originalElem3, zero); - elem3 = CreateSelect(isNullDesc, zero, elem3); - elem4 = CreateSelect(isNullDesc, zero, elem4); - } - - // Reassemble descriptor. - desc = CreateInsertElement(desc, elem4, 4); - desc = CreateInsertElement(desc, elem3, 3); - return desc; -} - // ===================================================================================================================== // Handle cases where we need to add the FragCoord x,y to the coordinate, and use ViewIndex as the z coordinate. // diff --git a/lgc/builder/InOutBuilder.cpp b/lgc/builder/InOutBuilder.cpp index f73fbf2bf2..e7a34f23d3 100644 --- a/lgc/builder/InOutBuilder.cpp +++ b/lgc/builder/InOutBuilder.cpp @@ -93,10 +93,9 @@ Value *BuilderImpl::CreateReadPerVertexInput(Type *resultTy, unsigned location, assert(m_shaderStage == ShaderStage::Fragment); // Fold constant locationOffset into location. - assert(isa(locationOffset)); - location += cast(locationOffset)->getZExtValue(); - locationOffset = getInt32(0); - locationCount = divideCeil(resultTy->getPrimitiveSizeInBits(), 128); + bool canFold = foldConstantLocationOffset(resultTy, location, locationOffset, elemIdx, locationCount, inputInfo); + assert(canFold); + (void(canFold)); // Unused // Mark the usage of the input/output. markGenericInputOutputUsage(false, location, locationCount, inputInfo, vertexIndex != nullptr); @@ -179,21 +178,15 @@ Value *BuilderImpl::readGenericInputOutput(bool isOutput, Type *resultTy, unsign assert(resultTy->isAggregateType() == false); assert(isOutput == false || m_shaderStage == ShaderStage::TessControl); - // Fold constant locationOffset into location. (Currently a variable locationOffset is only supported in - // TCS, TES, mesh shader, and FS custom interpolation.) - bool directlyMapLocations = true; - if (auto constLocOffset = dyn_cast(locationOffset)) { - location += constLocOffset->getZExtValue(); - locationOffset = getInt32(0); - locationCount = (resultTy->getPrimitiveSizeInBits() + 127U) / 128U; - directlyMapLocations = false; // Reset this flag if dynamic location indexing is avoided - } + // Fold constant locationOffset into location. + bool directlyMapLocations = + !foldConstantLocationOffset(resultTy, location, locationOffset, elemIdx, locationCount, inOutInfo); // Mark the usage of the input/output. markGenericInputOutputUsage(isOutput, location, locationCount, inOutInfo, vertexIndex != nullptr, directlyMapLocations); - // Generate LLPC call for reading the input/output. + // Generate the call for reading the input/output. Value *result = nullptr; switch (m_shaderStage.value()) { case ShaderStage::Vertex: { @@ -267,21 +260,15 @@ Instruction *BuilderImpl::CreateWriteGenericOutput(Value *valueToWrite, unsigned Value *vertexOrPrimitiveIndex) { assert(valueToWrite->getType()->isAggregateType() == false); - // Fold constant locationOffset into location (Currently a variable locationOffset is only supported in - // TCS or mesh shader). - bool directlyMapLocations = true; - if (auto constLocOffset = dyn_cast(locationOffset)) { - location += constLocOffset->getZExtValue(); - locationOffset = getInt32(0); - locationCount = (valueToWrite->getType()->getPrimitiveSizeInBits() + 127U) / 128U; - directlyMapLocations = false; // Reset this flag if dynamic location indexing is avoided - } + // Fold constant locationOffset into location. + bool directlyMapLocations = !foldConstantLocationOffset(valueToWrite->getType(), location, locationOffset, elemIdx, + locationCount, outputInfo); // Mark the usage of the output. markGenericInputOutputUsage(/*isOutput=*/true, location, locationCount, outputInfo, vertexOrPrimitiveIndex != nullptr, directlyMapLocations); - // Set up the args for the llpc call. + // Set up the args for the call writing the output. SmallVector args; switch (m_shaderStage.value()) { case ShaderStage::Vertex: @@ -424,17 +411,54 @@ void BuilderImpl::markGenericInputOutputUsage(bool isOutput, unsigned location, } // Add location map entries for this input/output + + // NOTE: For TCS input/output, TES input, and mesh shader output, their components could be separately indexed. + // We have to reserve all components in the location map and mark all of them as active. Otherwise, this might + // lead to failed searching when we try to find the location map info for this input/output. + bool reserveAllComponents = m_shaderStage == ShaderStage::TessControl || + (m_shaderStage == ShaderStage::TessEval && !isOutput) || + (m_shaderStage == ShaderStage::Mesh && isOutput); + for (unsigned i = 0; i < locationCount; ++i) { - InOutLocationInfo origLocationInfo; - origLocationInfo.setLocation(location + i); - origLocationInfo.setComponent(inOutInfo.getComponent()); - auto &newLocationInfo = (*inOutLocInfoMap)[origLocationInfo]; - if (directlyMapLocations) { - // Directly map the locations (trivial map) without further calculation - newLocationInfo.setLocation(location + i); - newLocationInfo.setComponent(inOutInfo.getComponent()); - } else - newLocationInfo.setData(InvalidValue); + if (reserveAllComponents) { + unsigned numComponents = 0; + if (inOutInfo.getNumComponents() > 4) { + assert(locationCount % 2 == 0); // Must have even number of locations for 64-bit data type + assert(inOutInfo.getComponent() == 0); // Start component must be 0 in this case + if (i % 2 == 0) + numComponents = 4; + else + numComponents = inOutInfo.getNumComponents() - 4; + } else { + numComponents = inOutInfo.getComponent() + inOutInfo.getNumComponents(); + } + assert(numComponents >= 1 && numComponents <= 4); // Valid number of components for a location is 1~4 + + for (unsigned j = 0; j < numComponents; ++j) { + InOutLocationInfo origLocationInfo; + origLocationInfo.setLocation(location + i); + origLocationInfo.setComponent(j); + + auto &newLocationInfo = (*inOutLocInfoMap)[origLocationInfo]; + if (directlyMapLocations) { + // Force to map the location trivially + newLocationInfo.setLocation(location + i); + newLocationInfo.setComponent(j); + } else + newLocationInfo.setData(InvalidValue); + } + } else { + InOutLocationInfo origLocationInfo; + origLocationInfo.setLocation(location + i); + origLocationInfo.setComponent(inOutInfo.getComponent()); + auto &newLocationInfo = (*inOutLocInfoMap)[origLocationInfo]; + if (directlyMapLocations) { + // Directly map the locations (trivial map) without further calculation + newLocationInfo.setLocation(location + i); + newLocationInfo.setComponent(inOutInfo.getComponent()); + } else + newLocationInfo.setData(InvalidValue); + } } } @@ -583,6 +607,59 @@ void BuilderImpl::markFsOutputType(Type *outputTy, unsigned location, InOutInfo resUsage->inOutUsage.fs.outputTypes[location] = basicTy; } +// ===================================================================================================================== +// Try to fold constant location offset if possible. This function also updates the field 'numComponents' of 'inOutInfo' +// if it is not specified by computing an appropriate value. +// +// @param inOutTy : Type of this input/output +// @param [in/out] location : Base location of this input/output +// @param [in/out] locationOffset : Variable location offset; must be within locationCount +// @param elemIdx : Element index in vector. (This is the SPIR-V "component", except that it is half the component for +// 64-bit elements.) +// @param [out] locationCount : Count of locations taken by this input/output +// @param [in/out] inOutInfo : Extra input/output info +// @returns : True if we can successfully fold the constant location offset. +bool BuilderImpl::foldConstantLocationOffset(Type *inOutTy, unsigned &location, Value *&locationOffset, Value *elemIdx, + unsigned &locationCount, InOutInfo &inOutInfo) { + // First, compute 'numComponents' if not specified + if (inOutInfo.getNumComponents() == 0) { + // Get the initial value from the type of this input/output. + unsigned numComponents = inOutTy->isVectorTy() ? cast(inOutTy)->getNumElements() : 1; + + // Then consider component indexing like this vecN[i]. + assert(isa(elemIdx)); // For dynamic component indexing, NumComponents must be specified by frontend. + unsigned componentIndex = cast(elemIdx)->getZExtValue(); + // Take component offset into account. The provided component index actually includes this value. We must subtract + // it to get real component index. + if (inOutTy->getScalarSizeInBits() == 64) { + assert(inOutInfo.getComponent() % 2 == 0); // Must be even + componentIndex -= inOutInfo.getComponent() / 2; + } else { + componentIndex -= inOutInfo.getComponent(); + } + numComponents = std::max(numComponents, componentIndex + 1); + + // For 64-bit data types, vector element or scalar is considered to occupy two components. Revise it. + if (inOutTy->getScalarSizeInBits() == 64) + numComponents *= 2; + + inOutInfo.setNumComponents(numComponents); + } + + // Then, try to fold constant locationOffset into location. Currently, a variable locationOffset is only supported in + // TCS, TES, mesh shader, and FS custom interpolation. + if (!isa(locationOffset)) + return false; + + const unsigned constantLocationOffset = cast(locationOffset)->getZExtValue(); + location += constantLocationOffset; + locationOffset = getInt32(0); + assert(inOutInfo.getNumComponents() >= 1); + locationCount = divideCeil(inOutInfo.getNumComponents(), 4); + + return true; +} + // ===================================================================================================================== // Get the mode and interp value for an FS "interpolated" (per-vertex attribute) read. // diff --git a/lgc/builder/MatrixBuilder.cpp b/lgc/builder/MatrixBuilder.cpp index 1d3e252f0d..dc4a3ff04f 100644 --- a/lgc/builder/MatrixBuilder.cpp +++ b/lgc/builder/MatrixBuilder.cpp @@ -28,6 +28,7 @@ * @brief LLPC source file: implementation of matrix Builder methods *********************************************************************************************************************** */ +#include "lgc/LgcDialect.h" #include "lgc/builder/BuilderImpl.h" #define DEBUG_TYPE "lgc-builder-impl-matrix" @@ -351,16 +352,16 @@ Value *BuilderImpl::CreateMatrixInverse(Value *const matrix, const Twine &instNa // @returns the corresponding LLVM type Type *BuilderCommon::transCooperativeMatrixElementType(CooperativeMatrixElementType elemType) { switch (elemType) { - case BuilderCommon::CooperativeMatrixElementType::Float16: - case BuilderCommon::CooperativeMatrixElementType::Float16Packed: + case CooperativeMatrixElementType::Float16: + case CooperativeMatrixElementType::Float16Packed: return getHalfTy(); - case BuilderCommon::CooperativeMatrixElementType::Float32: + case CooperativeMatrixElementType::Float32: return getFloatTy(); - case BuilderCommon::CooperativeMatrixElementType::Int16: + case CooperativeMatrixElementType::Int16: return getInt16Ty(); - case BuilderCommon::CooperativeMatrixElementType::Int32: + case CooperativeMatrixElementType::Int32: return getInt32Ty(); - case BuilderCommon::CooperativeMatrixElementType::Int8: + case CooperativeMatrixElementType::Int8: return getInt8Ty(); default: llvm_unreachable("The element type is not supported."); @@ -427,7 +428,8 @@ Value *BuilderCommon::CreateCooperativeMatrixExtract(Value *matrix, Value *index std::string callName(lgcName::CooperativeMatrixExtract); addTypeMangling(resultTy, args, callName); Value *result = - CreateNamedCall(callName, resultTy, args, {Attribute::ReadNone, Attribute::Speculatable, Attribute::WillReturn}); + CreateNamedCall(callName, resultTy, args, + {Attribute::ReadNone, Attribute::Convergent, Attribute::Speculatable, Attribute::WillReturn}); result->setName(instName); return result; } @@ -453,7 +455,8 @@ Value *BuilderCommon::CreateCooperativeMatrixInsert(Value *matrix, Value *value, std::string callName(lgcName::CooperativeMatrixInsert); addTypeMangling(resultTy, args, callName); Value *result = - CreateNamedCall(callName, resultTy, args, {Attribute::ReadNone, Attribute::Speculatable, Attribute::WillReturn}); + CreateNamedCall(callName, resultTy, args, + {Attribute::ReadNone, Attribute::Convergent, Attribute::Speculatable, Attribute::WillReturn}); result->setName(instName); return result; } @@ -472,7 +475,8 @@ Value *BuilderCommon::CreateCooperativeMatrixFill(Value *value, CooperativeMatri std::string callName(lgcName::CooperativeMatrixFill); addTypeMangling(resultTy, args, callName); Value *result = - CreateNamedCall(callName, resultTy, args, {Attribute::ReadNone, Attribute::Speculatable, Attribute::WillReturn}); + CreateNamedCall(callName, resultTy, args, + {Attribute::ReadNone, Attribute::Convergent, Attribute::Speculatable, Attribute::WillReturn}); result->setName(instName); return result; } @@ -492,10 +496,11 @@ Value *BuilderCommon::CreateCooperativeMatrixFill(Value *value, CooperativeMatri // @param elemType : Element type for the matrix. // @param layout : Identify whether it's A/B or C/D // @param memoryAccess : Parsed from memory operation. +// @param alignment : Alignment for memory operation. // @param instName : Name to give instruction(s). Value *BuilderCommon::CreateCooperativeMatrixLoad(Value *pointer, Value *stride, bool colMajor, CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout, - unsigned memoryAccess, const Twine &instName) { + unsigned memoryAccess, Align alignment, const Twine &instName) { Type *resultTy = getCooperativeMatrixTy(elemType, layout); std::string callName(lgcName::CooperativeMatrixLoad); Value *args[] = {pointer, @@ -503,9 +508,10 @@ Value *BuilderCommon::CreateCooperativeMatrixLoad(Value *pointer, Value *stride, getInt1(colMajor), getInt32(static_cast(elemType)), getInt32(static_cast(layout)), - getInt32(memoryAccess)}; + getInt32(memoryAccess), + getInt32(alignment.value())}; addTypeMangling(resultTy, args, callName); - Value *loadVal = CreateNamedCall(callName, resultTy, args, {Attribute::ReadOnly}); + Value *loadVal = CreateNamedCall(callName, resultTy, args, {Attribute::ReadOnly, Attribute::Convergent}); loadVal->setName(instName); return loadVal; } @@ -526,11 +532,12 @@ Value *BuilderCommon::CreateCooperativeMatrixLoad(Value *pointer, Value *stride, // @param elemType : Element type for the matrix. // @param layout : Identify the matrix type(A/B or C). // @param memoryAccess : Memoray operands +// @param alignment : Alignment for memory operation. // @param instName : Name to give instruction(s). Value *BuilderCommon::CreateCooperativeMatrixStore(Value *pointer, Value *matrix, Value *stride, bool colMajor, CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout, unsigned memoryAccess, - const Twine &instName) { + Align alignment, const Twine &instName) { assert(matrix->getType() == getCooperativeMatrixTy(elemType, layout)); std::string callName(lgcName::CooperativeMatrixStore); @@ -540,11 +547,12 @@ Value *BuilderCommon::CreateCooperativeMatrixStore(Value *pointer, Value *matrix getInt32(static_cast(elemType)), getInt32(static_cast(layout)), getInt32(memoryAccess), + getInt32(alignment.value()), matrix}; addTypeMangling(Type::getVoidTy(getContext()), args, callName); - Value *storeVal = - CreateNamedCall(callName, Type::getVoidTy(getContext()), args, {Attribute::WriteOnly, Attribute::WillReturn}); + Value *storeVal = CreateNamedCall(callName, Type::getVoidTy(getContext()), args, + {Attribute::WriteOnly, Attribute::Convergent, Attribute::WillReturn}); storeVal->setName(instName); return nullptr; } @@ -573,7 +581,8 @@ CallInst *BuilderCommon::CreateCooperativeMatrixConvert(CastInst::CastOps castOp std::string callName(lgcName::CooperativeMatrixConvert); addTypeMangling(resultTy, args, callName); - CallInst *dstElems = CreateNamedCall(callName, resultTy, args, {Attribute::ReadOnly, Attribute::WillReturn}); + CallInst *dstElems = + CreateNamedCall(callName, resultTy, args, {Attribute::ReadNone, Attribute::Convergent, Attribute::WillReturn}); dstElems->setName(instName); return dstElems; } @@ -597,7 +606,8 @@ Value *BuilderCommon::CreateCooperativeMatrixBinaryOp(CooperativeMatrixArithOp c getInt32(static_cast(layout))}; addTypeMangling(rhs->getType(), args, callName); - Value *result = CreateNamedCall(callName, rhs->getType(), args, {Attribute::ReadOnly, Attribute::WillReturn}); + Value *result = CreateNamedCall(callName, rhs->getType(), args, + {Attribute::ReadNone, Attribute::Convergent, Attribute::WillReturn}); result->setName(instName); return result; } @@ -622,7 +632,8 @@ Value *BuilderCommon::CreateCoopMatrixTimesScalar(Value *matrix, Value *scalar, Value *args[] = {matrix, scalar, getInt32(static_cast(elemType)), getInt32(static_cast(layout))}; addTypeMangling(matrix->getType(), args, callName); - Value *result = CreateNamedCall(callName, matrix->getType(), args, {Attribute::ReadOnly, Attribute::WillReturn}); + Value *result = CreateNamedCall(callName, matrix->getType(), args, + {Attribute::ReadNone, Attribute::Convergent, Attribute::WillReturn}); result->setName(instName); return result; } @@ -642,7 +653,8 @@ CallInst *BuilderCommon::CreateCooperativeMatrixTranspose(llvm::Value *matrix, C Value *args[] = {matrix, getInt32(static_cast(elemType)), getInt32(static_cast(layout))}; addTypeMangling(matrix->getType(), args, callName); - CallInst *result = CreateNamedCall(callName, matrix->getType(), args, {Attribute::ReadOnly, Attribute::WillReturn}); + CallInst *result = CreateNamedCall(callName, matrix->getType(), args, + {Attribute::ReadNone, Attribute::Convergent, Attribute::WillReturn}); result->setName(instName); return result; } @@ -678,7 +690,8 @@ Value *BuilderCommon::CreateCooperativeMatrixMulAdd(llvm::Value *matrixA, llvm:: getInt32(static_cast(factorElemType))}; addTypeMangling(matrixC->getType(), args, callName); - Value *result = CreateNamedCall(callName, matrixC->getType(), args, {Attribute::ReadOnly, Attribute::WillReturn}); + Value *result = CreateNamedCall(callName, matrixC->getType(), args, + {Attribute::ReadNone, Attribute::Convergent, Attribute::WillReturn}); result->setName(instName); return result; } diff --git a/lgc/builder/SubgroupBuilder.cpp b/lgc/builder/SubgroupBuilder.cpp index 28e3943a61..84b224e591 100644 --- a/lgc/builder/SubgroupBuilder.cpp +++ b/lgc/builder/SubgroupBuilder.cpp @@ -28,7 +28,7 @@ * @brief LLPC source file: implementation of subgroup Builder methods *********************************************************************************************************************** */ -#include "lgc/builder/BuilderImpl.h" +#include "lgc/builder/SubgroupBuilder.h" #include "lgc/state/PipelineState.h" #include "lgc/util/Internal.h" #include "llvm/IR/Intrinsics.h" @@ -74,6 +74,14 @@ unsigned BuilderImpl::getShaderWaveSize() { return getPipelineState()->getShaderWaveSize(shaderStage.value()); } +// ===================================================================================================================== +// Create a subgroup elect call. +// +// @param instName : Name to give final instruction. +Value *SubgroupBuilder::CreateSubgroupElect(const Twine &instName) { + return CreateICmpEQ(CreateSubgroupMbcnt(createGroupBallot(getTrue()), ""), getInt32(0)); +} + // ===================================================================================================================== // Create a subgroup all call. // @@ -93,6 +101,25 @@ Value *BuilderImpl::CreateSubgroupAll(Value *const value, const Twine &instName) return result; } +// ===================================================================================================================== +// Create a subgroup any call. +// +// @param value : The value to compare across the subgroup. Must be an integer type. +// @param instName : Name to give final instruction. +Value *SubgroupBuilder::CreateSubgroupAny(Value *const value, const Twine &instName) { + Value *result = CreateICmpNE(createGroupBallot(value), getInt64(0)); + result = CreateSelect(CreateUnaryIntrinsic(Intrinsic::is_constant, value), value, result); + + // Helper invocations of whole quad mode should be included in the subgroup vote execution + const auto &fragmentMode = m_pipelineState->getShaderModes()->getFragmentShaderMode(); + if (m_shaderStage == ShaderStage::Fragment && !fragmentMode.waveOpsExcludeHelperLanes) { + result = CreateZExt(result, getInt32Ty()); + result = CreateIntrinsic(Intrinsic::amdgcn_softwqm, {getInt32Ty()}, {result}); + result = CreateTrunc(result, getInt1Ty()); + } + return result; +} + // ===================================================================================================================== // Create a subgroup all equal call. // diff --git a/lgc/elfLinker/ElfLinker.cpp b/lgc/elfLinker/ElfLinker.cpp index 07b8f0b802..57de76edcb 100644 --- a/lgc/elfLinker/ElfLinker.cpp +++ b/lgc/elfLinker/ElfLinker.cpp @@ -30,6 +30,7 @@ */ #include "lgc/ElfLinker.h" #include "ColorExportShader.h" +#include "ElfLinkerImpl.h" #include "GlueShader.h" #include "lgc/state/AbiMetadata.h" #include "lgc/state/PalMetadata.h" @@ -45,211 +46,18 @@ #define DEBUG_TYPE "lgc-elf-linker" -using namespace lgc; using namespace llvm; -namespace { - -class ElfLinkerImpl; - -// ===================================================================================================================== -// An ELF input to the linker -struct ElfInput { - std::unique_ptr objectFile; - SmallVector, 4> sectionMap; - StringRef reduceAlign; // If non-empty, the name of a text section to reduce the alignment to 0x40 -}; - -// ===================================================================================================================== -// A single input section -struct InputSection { - InputSection(object::SectionRef sectionRef) : sectionRef(sectionRef), size(sectionRef.getSize()) {} - object::SectionRef sectionRef; // Section from the input ELF - size_t offset = 0; // Offset within the output ELF section - uint64_t size; // Size, possibly after removing s_end_code padding -}; - -// ===================================================================================================================== -// A single output section -class OutputSection { -public: - // Constructor given name and optional SHT_* section type - OutputSection(ElfLinkerImpl *linker, StringRef name = "", unsigned type = 0) - : m_linker(linker), m_name(name), m_type(type) {} - - // Add an input section - void addInputSection(ElfInput &elfInput, object::SectionRef inputSection, bool reduceAlign = false); - - // Get name of output section - StringRef getName(); - - // Get the section index in the output file - unsigned getIndex(); - - // Set the layout of this output section, allowing for alignment required by input sections. - void layout(); - - // Add a symbol to the output symbol table - void addSymbol(const object::ELFSymbolRef &elfSymRef, unsigned inputSectIdx); - - // Add a relocation to the output elf - void addRelocation(object::ELFRelocationRef relocRef, StringRef id, unsigned int relocSectionOffset, - unsigned int targetSectionOffset, unsigned sectType); - - // Get the output file offset of a particular input section in the output section - uint64_t getOutputOffset(unsigned inputIdx) { return m_offset + m_inputSections[inputIdx].offset; } - - // Get the overall alignment requirement, after calling layout(). - Align getAlignment() const { return m_alignment; } - - // Write the output section - void write(raw_pwrite_stream &outStream, ELF::Elf64_Shdr *shdr); - -private: - // Flag that we want to reduce alignment on the given input section, for gluing code together. - void setReduceAlign(const InputSection &inputSection) { - m_reduceAlign |= 1ULL << (&inputSection - &m_inputSections[0]); - } - - // See if the given input section has the reduce align flag set. - bool getReduceAlign(const InputSection &inputSection) const { - return (m_reduceAlign >> (&inputSection - &m_inputSections[0])) & 1; - } - - // Get alignment for an input section. This takes into account the reduceAlign flag. - Align getAlignment(const InputSection &inputSection); - - ElfLinkerImpl *m_linker; - StringRef m_name; // Section name - unsigned m_type; // Section type (SHT_* value) - uint64_t m_offset = 0; // File offset of this output section - SmallVector m_inputSections; // Input sections contributing to this output section - Align m_alignment; // Overall alignment required for the section - unsigned m_reduceAlign = 0; // Bitmap of input sections to reduce alignment for -}; - -// ===================================================================================================================== -// Internal implementation of the LGC interface for ELF linking. -class ElfLinkerImpl final : public ElfLinker { -public: - // Constructor given PipelineState and ELFs to link - ElfLinkerImpl(PipelineState *pipelineState, ArrayRef elfs); - - // Destructor - ~ElfLinkerImpl() override final; - - // ----------------------------------------------------------------------------------------------------------------- - // Implementations of ElfLinker methods exposed to the front-end - - // Add another input ELF to the link, in addition to the ones that were added when the ElfLinker was constructed. - // The default behavior of adding extra ones at the start of the list instead of the end is just so you - // get the same order of code (VS then FS) when doing a part-pipeline compile as when doing a whole pipeline - // compile, to make it easier to test by diff. - void addInputElf(MemoryBufferRef inputElf) override final { addInputElf(inputElf, /*addAtStart=*/true); } - void addInputElf(MemoryBufferRef inputElf, bool addAtStart); - - // Check whether we have FS input mappings, and thus whether we're doing part-pipeline compilation of the - // pre-FS part of the pipeline. - bool haveFsInputMappings() override final; - - // Get a representation of the fragment shader input mappings from the PAL metadata of ELF input(s) added so far. - // This is used by the caller in a part-pipeline compilation scheme to include the FS input mappings in the - // hash for the non-FS part of the pipeline. - StringRef getFsInputMappings() override final; - - // Get information on the glue code that will be needed for the link - llvm::ArrayRef getGlueInfo() override final; - - // Explicitly build color export shader - StringRef createColorExportShader(ArrayRef exports, bool enableKill) override final; - - // Add a blob for a particular chunk of glue code, typically retrieved from a cache - void addGlue(unsigned glueIndex, StringRef blob) override final; - - // Compile a particular chunk of glue code and retrieve its blob - StringRef compileGlue(unsigned glueIndex) override final; - - // Link the unlinked shader/part-pipeline ELFs and the compiled glue code into a pipeline ELF - bool link(raw_pwrite_stream &outStream) override final; - - // ----------------------------------------------------------------------------------------------------------------- - // Accessors - - PipelineState *getPipelineState() const { return m_pipelineState; } - ArrayRef getOutputSections() { return m_outputSections; } - StringRef getStrings() { return m_strings; } - SmallVectorImpl &getSymbols() { return m_symbols; } - SmallVectorImpl &getRelocations() { return m_relocations; } - SmallVectorImpl &getRelocationsA() { return m_relocationsA; } - void setStringTableIndex(unsigned index) { m_ehdr.e_shstrndx = index; } - StringRef getNotes() { return m_notes; } - - // Get string index in output ELF, adding to string table if necessary - unsigned getStringIndex(StringRef string); - - // Get string index in output ELF. Returns 0 if not found. - unsigned findStringIndex(StringRef string); - - // Find symbol in output ELF. Returns 0 if not found. - unsigned findSymbol(unsigned nameIndex); - unsigned findSymbol(StringRef name); - -private: - // Processing when all inputs are done. - void doneInputs(); - - // Find where an input section contributes to an output section - std::pair findInputSection(ElfInput &elfInput, object::SectionRef section); - - // Read PAL metadata from an ELF file and merge it in to the PAL metadata that we already have - void mergePalMetadataFromElf(object::ObjectFile &objectFile, bool isGlueCode); - - // Read ISA name string from an ELF file if not already done - void readIsaName(object::ObjectFile &objectFile); - - // Write ISA name into the .note section. - void writeIsaName(Align align); - - // Write the PAL metadata out into the .note section. - void writePalMetadata(Align align); - - // Create a GlueShader object for each glue shader needed for this link. - void createGlueShaders(); +namespace lgc { - // Insert glue shaders (if any). - bool insertGlueShaders(); - - // Returns true of the given elf contains just 1 shader. - bool containsASingleShader(ElfInput &elf); - - PipelineState *m_pipelineState; // PipelineState object - SmallVector m_elfInputs; // ELF objects to link - SmallVector, 4> m_glueShaders; // Glue shaders needed for link - SmallVector m_glueStrings; // Strings to return for glue shader cache keys - ELF::Elf64_Ehdr m_ehdr; // Output ELF header, copied from first input - SmallVector m_outputSections; // Output sections - SmallVector m_symbols; // Symbol table - SmallVector m_relocations; // Relocations - SmallVector m_relocationsA; // Relocations with explicit addend - StringMap m_symbolMap; // Map from name to symbol index - std::string m_strings; // Strings for string table - StringMap m_stringMap; // Map from string to string table index - std::string m_notes; // Notes to go in .note section - bool m_doneInputs = false; // Set when caller has done adding inputs - StringRef m_isaName; // ISA name to include in the .note section -}; - -} // anonymous namespace +using namespace Util; -namespace lgc { // ===================================================================================================================== // Create ELF linker given PipelineState and ELFs to link ElfLinker *createElfLinkerImpl(PipelineState *pipelineState, ArrayRef elfs) { return new ElfLinkerImpl(pipelineState, elfs); } -} // namespace lgc - // ===================================================================================================================== // Constructor given PipelineState and ELFs to link // @@ -280,7 +88,7 @@ void ElfLinkerImpl::addInputElf(MemoryBufferRef inputElf, bool addAtStart) { // Add the ELF. readIsaName(*elfInput.objectFile); mergePalMetadataFromElf(*elfInput.objectFile, false); - m_elfInputs.insert(addAtStart ? m_elfInputs.begin() : m_elfInputs.end(), std::move(elfInput)); + m_currentElfInput = m_elfInputs.insert(addAtStart ? m_elfInputs.begin() : m_elfInputs.end(), std::move(elfInput)); } // ===================================================================================================================== @@ -851,17 +659,20 @@ bool ElfLinkerImpl::containsASingleShader(ElfInput &elf) { // Add an input section to this output section // // @param elfInput : ELF input that the section comes from -// @param inputSection : Input section to add to this output section +// @param inputSectionRef : Input section to add to this output section // @param reduceAlign : Reduce the alignment of the section (for gluing code together) -void OutputSection::addInputSection(ElfInput &elfInput, object::SectionRef inputSection, bool reduceAlign) { +void OutputSection::addInputSection(ElfInput &elfInput, object::SectionRef inputSectionRef, bool reduceAlign) { // Add the input section. + InputSection inputSection(inputSectionRef); + m_inputSections.push_back(inputSection); + // Remember reduceAlign request. if (reduceAlign) setReduceAlign(m_inputSections.back()); // Add an entry to the ElfInput's sectionMap, so we can get from an input section to where it contributes // to an output section. - unsigned idx = inputSection.getIndex(); + unsigned idx = inputSectionRef.getIndex(); if (idx >= elfInput.sectionMap.size()) elfInput.sectionMap.resize(idx + 1, {UINT_MAX, UINT_MAX}); elfInput.sectionMap[idx] = {getIndex(), m_inputSections.size() - 1}; @@ -1081,8 +892,9 @@ void OutputSection::write(raw_pwrite_stream &outStream, ELF::Elf64_Shdr *shdr) { // Write the input section StringRef contents = cantFail(inputSection.sectionRef.getContents()); - outStream << contents.slice(0, inputSection.size); - size += inputSection.size; + uint64_t contentSize = inputSection.size; + outStream << contents.slice(0, contentSize); + size += contentSize; } if (endPadding) { @@ -1104,3 +916,5 @@ void OutputSection::write(raw_pwrite_stream &outStream, ELF::Elf64_Shdr *shdr) { shdr->sh_size = size; shdr->sh_addralign = m_alignment.value(); } + +} // namespace lgc diff --git a/lgc/elfLinker/ElfLinkerImpl.h b/lgc/elfLinker/ElfLinkerImpl.h new file mode 100644 index 0000000000..5d999986d9 --- /dev/null +++ b/lgc/elfLinker/ElfLinkerImpl.h @@ -0,0 +1,234 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file ElfLinkerImpl.h + * @brief LLPC header file: The class implements linking unlinked shader/part-pipeline ELFs into pipeline ELF + *********************************************************************************************************************** + */ +#pragma once + +#include "GlueShader.h" +#include "lgc/ElfLinker.h" +#include "llvm/BinaryFormat/MsgPackDocument.h" +#include "llvm/Object/ELFObjectFile.h" +#include + +struct ShaderDbgOutput; + +namespace lgc { + +class PipelineState; +class ElfLinkerImpl; + +// ===================================================================================================================== +// An ELF input to the linker +struct ElfInput { + std::unique_ptr objectFile; + llvm::SmallVector, 4> sectionMap; + llvm::StringRef reduceAlign; // If non-empty, the name of a text section to reduce the alignment to 0x40 +}; + +// ===================================================================================================================== +// A single input section +struct InputSection { + InputSection(llvm::object::SectionRef sectionRef) : sectionRef(sectionRef), size(sectionRef.getSize()) {} + llvm::object::SectionRef sectionRef; // Section from the input ELF + size_t offset = 0; // Offset within the output ELF section + uint64_t size; // Size, possibly after removing s_end_code padding +}; + +// ===================================================================================================================== +// A single output section +class OutputSection { +public: + // Constructor given name and optional SHT_* section type + OutputSection(ElfLinkerImpl *linker, llvm::StringRef name = "", unsigned type = 0) + : m_linker(linker), m_name(name), m_type(type) {} + + // Add an input section + void addInputSection(ElfInput &elfInput, llvm::object::SectionRef inputSectionRef, bool reduceAlign = false); + + // Get name of output section + llvm::StringRef getName(); + + // Get the section index in the output file + unsigned getIndex(); + + // Set the layout of this output section, allowing for alignment required by input sections. + void layout(); + + // Add a symbol to the output symbol table + void addSymbol(const llvm::object::ELFSymbolRef &elfSymRef, unsigned inputSectIdx); + + // Add a relocation to the output elf + void addRelocation(llvm::object::ELFRelocationRef relocRef, llvm::StringRef id, unsigned int relocSectionOffset, + unsigned int targetSectionOffset, unsigned sectType); + + // Get the output file offset of a particular input section in the output section + uint64_t getOutputOffset(unsigned inputIdx) { return m_offset + m_inputSections[inputIdx].offset; } + + // Get the overall alignment requirement, after calling layout(). + llvm::Align getAlignment() const { return m_alignment; } + + // Write the output section + void write(llvm::raw_pwrite_stream &outStream, llvm::ELF::Elf64_Shdr *shdr); + +private: + // Flag that we want to reduce alignment on the given input section, for gluing code together. + void setReduceAlign(const InputSection &inputSection) { + m_reduceAlign |= 1ULL << (&inputSection - &m_inputSections[0]); + } + + // See if the given input section has the reduce align flag set. + bool getReduceAlign(const InputSection &inputSection) const { + return (m_reduceAlign >> (&inputSection - &m_inputSections[0])) & 1; + } + + // Get alignment for an input section. This takes into account the reduceAlign flag. + llvm::Align getAlignment(const InputSection &inputSection); + + ElfLinkerImpl *m_linker; + llvm::StringRef m_name; // Section name + unsigned m_type; // Section type (SHT_* value) + uint64_t m_offset = 0; // File offset of this output section + llvm::SmallVector m_inputSections; // Input sections contributing to this output section + llvm::Align m_alignment; // Overall alignment required for the section + unsigned m_reduceAlign = 0; // Bitmap of input sections to reduce alignment for +}; + +// ===================================================================================================================== +// Internal implementation of the LGC interface for ELF linking. +class ElfLinkerImpl final : public lgc::ElfLinker { +public: + // Constructor given PipelineState and ELFs to link + ElfLinkerImpl(lgc::PipelineState *pipelineState, llvm::ArrayRef elfs); + + // Destructor + ~ElfLinkerImpl() override final; + + // ----------------------------------------------------------------------------------------------------------------- + // Implementations of ElfLinker methods exposed to the front-end + + // Add another input ELF to the link, in addition to the ones that were added when the ElfLinker was constructed. + // The default behavior of adding extra ones at the start of the list instead of the end is just so you + // get the same order of code (VS then FS) when doing a part-pipeline compile as when doing a whole pipeline + // compile, to make it easier to test by diff. + void addInputElf(llvm::MemoryBufferRef inputElf) override final { addInputElf(inputElf, /*addAtStart=*/true); } + void addInputElf(llvm::MemoryBufferRef inputElf, bool addAtStart); + + // Check whether we have FS input mappings, and thus whether we're doing part-pipeline compilation of the + // pre-FS part of the pipeline. + bool haveFsInputMappings() override final; + + // Get a representation of the fragment shader input mappings from the PAL metadata of ELF input(s) added so far. + // This is used by the caller in a part-pipeline compilation scheme to include the FS input mappings in the + // hash for the non-FS part of the pipeline. + llvm::StringRef getFsInputMappings() override final; + + // Get information on the glue code that will be needed for the link + llvm::ArrayRef getGlueInfo() override final; + + // Explicitly build color export shader + llvm::StringRef createColorExportShader(llvm::ArrayRef exports, bool enableKill) override final; + + // Add a blob for a particular chunk of glue code, typically retrieved from a cache + void addGlue(unsigned glueIndex, llvm::StringRef blob) override final; + + // Compile a particular chunk of glue code and retrieve its blob + llvm::StringRef compileGlue(unsigned glueIndex) override final; + + // Link the unlinked shader/part-pipeline ELFs and the compiled glue code into a pipeline ELF + bool link(llvm::raw_pwrite_stream &outStream) override final; + + // ----------------------------------------------------------------------------------------------------------------- + // Accessors + + lgc::PipelineState *getPipelineState() const { return m_pipelineState; } + llvm::ArrayRef getOutputSections() { return m_outputSections; } + llvm::StringRef getStrings() { return m_strings; } + llvm::SmallVectorImpl &getSymbols() { return m_symbols; } + llvm::SmallVectorImpl &getRelocations() { return m_relocations; } + llvm::SmallVectorImpl &getRelocationsA() { return m_relocationsA; } + void setStringTableIndex(unsigned index) { m_ehdr.e_shstrndx = index; } + llvm::StringRef getNotes() { return m_notes; } + + // Get string index in output ELF, adding to string table if necessary + unsigned getStringIndex(llvm::StringRef string); + + // Get string index in output ELF. Returns 0 if not found. + unsigned findStringIndex(llvm::StringRef string); + + // Find symbol in output ELF. Returns 0 if not found. + unsigned findSymbol(unsigned nameIndex); + unsigned findSymbol(llvm::StringRef name); + +private: + // Processing when all inputs are done. + void doneInputs(); + + // Find where an input section contributes to an output section + std::pair findInputSection(ElfInput &elfInput, llvm::object::SectionRef section); + + // Read PAL metadata from an ELF file and merge it in to the PAL metadata that we already have + void mergePalMetadataFromElf(llvm::object::ObjectFile &objectFile, bool isGlueCode); + + // Read ISA name string from an ELF file if not already done + void readIsaName(llvm::object::ObjectFile &objectFile); + + // Write ISA name into the .note section. + void writeIsaName(llvm::Align align); + + // Write the PAL metadata out into the .note section. + void writePalMetadata(llvm::Align align); + + // Create a GlueShader object for each glue shader needed for this link. + void createGlueShaders(); + + // Insert glue shaders (if any). + bool insertGlueShaders(); + + // Returns true of the given elf contains just 1 shader. + bool containsASingleShader(ElfInput &elf); + + lgc::PipelineState *m_pipelineState; // PipelineState object + llvm::SmallVector m_elfInputs; // ELF objects to link + ElfInput *m_currentElfInput = nullptr; // Currently inserted ELF object + llvm::SmallVector, 4> m_glueShaders; // Glue shaders needed for link + llvm::SmallVector m_glueStrings; // Strings to return for glue shader cache keys + llvm::ELF::Elf64_Ehdr m_ehdr; // Output ELF header, copied from first input + llvm::SmallVector m_outputSections; // Output sections + llvm::SmallVector m_symbols; // Symbol table + llvm::SmallVector m_relocations; // Relocations + llvm::SmallVector m_relocationsA; // Relocations with explicit addend + llvm::StringMap m_symbolMap; // Map from name to symbol index + std::string m_strings; // Strings for string table + llvm::StringMap m_stringMap; // Map from string to string table index + std::string m_notes; // Notes to go in .note section + bool m_doneInputs = false; // Set when caller has done adding inputs + llvm::StringRef m_isaName; // ISA name to include in the .note section +}; + +} // namespace lgc diff --git a/lgc/elfLinker/ElfLinkerShaderDbg.cpp b/lgc/elfLinker/ElfLinkerShaderDbg.cpp new file mode 100644 index 0000000000..f11c6264c3 --- /dev/null +++ b/lgc/elfLinker/ElfLinkerShaderDbg.cpp @@ -0,0 +1,30 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file ElfLinkerImplShaderDbg.cpp + * @brief LLPC source file: ShaderDbg support for ElfLinkerImpl class + *********************************************************************************************************************** + */ diff --git a/lgc/include/lgc/builder/BuilderImpl.h b/lgc/include/lgc/builder/BuilderImpl.h index aefb6c4eeb..59d2f7911b 100644 --- a/lgc/include/lgc/builder/BuilderImpl.h +++ b/lgc/include/lgc/builder/BuilderImpl.h @@ -423,9 +423,6 @@ class BuilderImpl : public BuilderDefs { // For a cubearray with integer coordinates, combine the face and slice into a single component. void combineCubeArrayFaceAndSlice(llvm::Value *coord, llvm::SmallVectorImpl &coords); - // Patch descriptor with cube dimension for image call - llvm::Value *patchCubeDescriptor(llvm::Value *desc, unsigned dim); - // Handle cases where we need to add the FragCoord x,y to the coordinate, and use ViewIndex as the z coordinate. llvm::Value *handleFragCoordViewIndex(llvm::Value *coord, unsigned flags, unsigned &dim); @@ -521,6 +518,10 @@ class BuilderImpl : public BuilderDefs { // Mark fragment output type void markFsOutputType(llvm::Type *outputTy, unsigned location, InOutInfo outputInfo); + // Try to fold constant location offset if possible. + bool foldConstantLocationOffset(llvm::Type *inOutTy, unsigned &location, llvm::Value *&locationOffset, + llvm::Value *elemIdx, unsigned &locationCount, InOutInfo &inOutInfo); + std::tuple getInterpModeAndValue(InOutInfo inputInfo, llvm::Value *auxInterpValue); llvm::Value *evalIjOffsetSmooth(llvm::Value *offset); llvm::Value *adjustIj(llvm::Value *value, llvm::Value *offset); diff --git a/lgc/include/lgc/builder/SubgroupBuilder.h b/lgc/include/lgc/builder/SubgroupBuilder.h new file mode 100644 index 0000000000..89b4cbf163 --- /dev/null +++ b/lgc/include/lgc/builder/SubgroupBuilder.h @@ -0,0 +1,64 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file SubgroupBuilder.h + * @brief LLPC header file: declaration of lgc::SubgroupBuilder implementation classes + *********************************************************************************************************************** + */ +#pragma once + +#include "lgc/builder/BuilderImpl.h" + +namespace lgc { + +// ===================================================================================================================== +// SubgroupBuilder class +// +// This class is meant to only be used by the LowerSubgroupOps pass. Using it from other passes could result in bugs +// when the wave size gets changed during a later stage. +class SubgroupBuilder : public BuilderImpl { +public: + SubgroupBuilder(Pipeline *pipeline) : BuilderImpl(pipeline){}; + + // ===================================================================================================================== + // Create a subgroup elect. + // + // @param instName : Name to give instruction(s) + llvm::Value *CreateSubgroupElect(const llvm::Twine &instName = ""); + + // Create a subgroup any + // + // @param value : The value to compare + // @param instName : Name to give instruction(s) + llvm::Value *CreateSubgroupAny(llvm::Value *const value, const llvm::Twine &instName = ""); + +private: + SubgroupBuilder() = delete; + SubgroupBuilder(const SubgroupBuilder &) = delete; + SubgroupBuilder &operator=(const SubgroupBuilder &) = delete; +}; + +} // namespace lgc diff --git a/lgc/include/lgc/patch/FragColorExport.h b/lgc/include/lgc/patch/FragColorExport.h index 2975286399..c7e949f547 100644 --- a/lgc/include/lgc/patch/FragColorExport.h +++ b/lgc/include/lgc/patch/FragColorExport.h @@ -64,9 +64,8 @@ class FragColorExport { ColorExportState colorExportState; // Color export state unsigned channelWriteMask[MaxColorTargets]; // Write mask to specify destination channels unsigned expFmt[MaxColorTargets]; // Export format used for "export" instruction. - unsigned dualExpFmt[2]; // Dual source blend export format. valid if dual source blend is enabled. - unsigned waveSize; // The wave size for fragment. - bool enableFragColor; // Whether to broadcast frag color. Only for OGLP + unsigned waveSize; // The wave size for fragment. + bool enableFragColor; // Whether to broadcast frag color. Only for OGLP }; FragColorExport(LgcContext *context); @@ -89,10 +88,10 @@ class FragColorExport { FragColorExport(const FragColorExport &) = delete; FragColorExport &operator=(const FragColorExport &) = delete; void updateColorExportInfoWithBroadCastInfo(const Key &key, llvm::ArrayRef originExpinfo, - llvm::SmallVector &outExpinfo, unsigned *pCbShaderMask); + bool needMrt0a, llvm::SmallVector &outExpinfo, + unsigned *pCbShaderMask); llvm::Value *handleColorExportInstructions(llvm::Value *output, unsigned int hwColorExport, BuilderBase &builder, - ExportFormat expFmt, const bool signedness, unsigned channelWriteMask, - const bool isDualSourceBlend); + ExportFormat expFmt, const bool signedness, const bool isDualSourceBlend); llvm::Value *convertToHalf(llvm::Value *value, bool signedness, BuilderBase &builder) const; llvm::Value *convertToFloat(llvm::Value *value, bool signedness, BuilderBase &builder) const; llvm::Value *convertToInt(llvm::Value *value, bool signedness, BuilderBase &builder) const; @@ -102,7 +101,7 @@ class FragColorExport { // Colors to be exported for dual-source-blend llvm::SmallVector m_blendSources[2]; // Number of color channels for dual-source-blend - unsigned m_blendSourceChannels; + unsigned m_blendSourceChannels = 0; LgcContext *m_lgcContext; }; diff --git a/lgc/include/lgc/patch/LowerCooperativeMatrix.h b/lgc/include/lgc/patch/LowerCooperativeMatrix.h index c67c3103ed..915dc399f2 100644 --- a/lgc/include/lgc/patch/LowerCooperativeMatrix.h +++ b/lgc/include/lgc/patch/LowerCooperativeMatrix.h @@ -43,6 +43,10 @@ class CooperativeRowAccLoadOp; class CooperativeRowAccStoreOp; class CooperativeRowAccFinalizeModeOp; class CooperativeRowAccAccumulateModeOp; +class CooperativeRowAccSplatOp; +class CooperativeRowAccExpandOp; +class CooperativeRowAccSumAccumulateOp; +class CooperativeRowAccScalarOp; // ===================================================================================================================== // Pass to lower coopMatrix calls @@ -87,115 +91,106 @@ class LowerCooperativeMatrix : public Patch, public llvm::PassInfoMixinv8*coopMatrix_data as two 16bits elements packed. llvm::Value *convFlatVecToCoopMatrixVec(BuilderCommon &builder, llvm::Value *vecValue, - Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout); + CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout); // Convert cooperativeMatrix vec data to vec data. llvm::Value *convCoopMatrixVecToFlatVec(BuilderCommon &builder, llvm::Value *matrixValue, - Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout); + CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout); // Create cooperative matrix store operation void cooperativeMatrixStoreInternal(llvm::Value *dataPtr, llvm::Value *stride, bool colMajor, - Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout, unsigned memoryAccess, - llvm::Value *&vecVal, const llvm::Twine &instName, llvm::Instruction *insertPos); + CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout, + unsigned memoryAccess, unsigned alignment, llvm::Value *&vecVal, + const llvm::Twine &instName, llvm::Instruction *insertPos); // Open-code cooperative matrix extract operation llvm::Value *cooperativeMatrixExtract(BuilderCommon &builder, llvm::Value *matrix, llvm::Value *index, - Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout); + CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout); // Open-code cooperative matrix insert operation llvm::Value *cooperativeMatrixInsert(BuilderCommon &builder, llvm::Value *matrix, llvm::Value *value, - llvm::Value *index, Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout); + llvm::Value *index, CooperativeMatrixElementType elemType, + CooperativeMatrixLayout layout); // Open-code cooperative matrix fill operation - llvm::Value *cooperativeMatrixFill(BuilderCommon &builder, llvm::Value *value, - Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout); + llvm::Value *cooperativeMatrixFill(BuilderCommon &builder, llvm::Value *value, CooperativeMatrixElementType elemType, + CooperativeMatrixLayout layout); // Create cooperative matrix convert operation llvm::Value *cooperativeMatrixConvert(llvm::CastInst::CastOps castOp, llvm::Value *source, - Builder::CooperativeMatrixElementType srcElemType, - Builder::CooperativeMatrixElementType dstElemType, - Builder::CooperativeMatrixLayout srclayout, - Builder::CooperativeMatrixLayout dstlayout, const llvm::Twine &instName, + CooperativeMatrixElementType srcElemType, + CooperativeMatrixElementType dstElemType, CooperativeMatrixLayout srclayout, + CooperativeMatrixLayout dstlayout, const llvm::Twine &instName, llvm::Instruction *insertPos); // Create cooperative matrix convert operation without reshape operation llvm::Value *cooperativeMatrixConvertInternal(llvm::CastInst::CastOps castOp, llvm::Value *source, - Builder::CooperativeMatrixElementType srcElemType, - Builder::CooperativeMatrixElementType dstElemType, - const llvm::Twine &instName, llvm::Instruction *insertPos); + CooperativeMatrixElementType srcElemType, + CooperativeMatrixElementType dstElemType, const llvm::Twine &instName, + llvm::Instruction *insertPos); // Create cooperative matrix binary operation - llvm::Value *cooperativeMatrixBinaryOp(Builder::CooperativeMatrixArithOp coopMatArithOp, llvm::Value *lhs, - llvm::Value *rhs, Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout, const llvm::Twine &instName, - llvm::Instruction *insertPos); + llvm::Value *cooperativeMatrixBinaryOp(CooperativeMatrixArithOp coopMatArithOp, llvm::Value *lhs, llvm::Value *rhs, + CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout, + const llvm::Twine &instName, llvm::Instruction *insertPos); // Create cooperative matrixTimeScalar operation - llvm::Value *coopMatrixTimesScalar(llvm::Value *matrix, llvm::Value *scalar, - Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout, const llvm::Twine &instName, + llvm::Value *coopMatrixTimesScalar(llvm::Value *matrix, llvm::Value *scalar, CooperativeMatrixElementType elemType, + CooperativeMatrixLayout layout, const llvm::Twine &instName, llvm::Instruction *insertPos); // Create cooperative matrix reshape operation for 16bit on gfx10 and gfx110 - llvm::Value *cooperativeMatrixReshape16BitElementGfx1011(llvm::Value *matrix, - Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout srcLayout, - Builder::CooperativeMatrixLayout dstLayout, - llvm::Value *threadId, const llvm::Twine &instName, - llvm::Instruction *insertPos); + llvm::Value *cooperativeMatrixReshape16BitElementGfx1011(llvm::Value *matrix, CooperativeMatrixElementType elemType, + CooperativeMatrixLayout srcLayout, + CooperativeMatrixLayout dstLayout, llvm::Value *threadId, + const llvm::Twine &instName, llvm::Instruction *insertPos); // Create cooperative matrix reshape operation for 8bit on gfx10 and gfx11 - llvm::Value *cooperativeMatrixReshapeBetween8bitAnd32bitElementGfx1011( - llvm::Value *matrix, Builder::CooperativeMatrixElementType srcElemType, - Builder::CooperativeMatrixLayout srcLayout, const llvm::Twine &instName, llvm::Instruction *insertPos); + llvm::Value *cooperativeMatrixReshapeBetween8bitAnd32bitElementGfx1011(llvm::Value *matrix, + CooperativeMatrixElementType srcElemType, + CooperativeMatrixLayout srcLayout, + const llvm::Twine &instName, + llvm::Instruction *insertPos); // Adjust the layout on accumulator for gfx10 - llvm::Value *cooperativeMatrixReshapeBetween16bitAnd32bitOnAccGfx10( - llvm::Value *source, Builder::CooperativeMatrixElementType srcElemType, - Builder::CooperativeMatrixElementType dstElemType, Builder::CooperativeMatrixLayout layout, - llvm::Value *isEvenGroup, const llvm::Twine &instName, llvm::Instruction *insertPos); + llvm::Value * + cooperativeMatrixReshapeBetween16bitAnd32bitOnAccGfx10(llvm::Value *source, CooperativeMatrixElementType srcElemType, + CooperativeMatrixElementType dstElemType, + CooperativeMatrixLayout layout, llvm::Value *isEvenGroup, + const llvm::Twine &instName, llvm::Instruction *insertPos); // Adjust the layout before reshape operation(eg:float16->float32) - llvm::Value *cooperativeMatrixReshapeBeforeConvert(llvm::Value *source, - Builder::CooperativeMatrixElementType srcElemType, - Builder::CooperativeMatrixElementType dstElemType, - Builder::CooperativeMatrixLayout srcLayout, - Builder::CooperativeMatrixLayout dstLayout, - const llvm::Twine &instName, llvm::Instruction *insertPos); + llvm::Value *cooperativeMatrixReshapeBeforeConvert(llvm::Value *source, CooperativeMatrixElementType srcElemType, + CooperativeMatrixElementType dstElemType, + CooperativeMatrixLayout srcLayout, + CooperativeMatrixLayout dstLayout, const llvm::Twine &instName, + llvm::Instruction *insertPos); // Adjust the layout before reshape operation(eg:float32->float16) - llvm::Value *cooperativeMatrixReshapeAfterConvert(llvm::Value *source, - Builder::CooperativeMatrixElementType srcElemType, - Builder::CooperativeMatrixElementType dstElemType, - Builder::CooperativeMatrixLayout srcLayout, - Builder::CooperativeMatrixLayout dstLayout, - const llvm::Twine &instName, llvm::Instruction *insertPos); + llvm::Value *cooperativeMatrixReshapeAfterConvert(llvm::Value *source, CooperativeMatrixElementType srcElemType, + CooperativeMatrixElementType dstElemType, + CooperativeMatrixLayout srcLayout, + CooperativeMatrixLayout dstLayout, const llvm::Twine &instName, + llvm::Instruction *insertPos); // Create cooperative matrix transpose operation - llvm::Value *cooperativeMatrixTranspose(llvm::Value *matrix, Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout srcLayout, const llvm::Twine &instName, + llvm::Value *cooperativeMatrixTranspose(llvm::Value *matrix, CooperativeMatrixElementType elemType, + CooperativeMatrixLayout srcLayout, const llvm::Twine &instName, llvm::Instruction *insertPos); llvm::Value *transposeCooperativeMatrixRecursively(llvm::Value *matrix, unsigned vecStride, unsigned laneStride, @@ -204,9 +199,9 @@ class LowerCooperativeMatrix : public Patch, public llvm::PassInfoMixin { public: @@ -75,6 +76,7 @@ class LowerGpuRt : public llvm::PassInfoMixin { void visitGetFlattenedGroupThreadId(lgc::GpurtGetFlattenedGroupThreadIdOp &inst); void visitFloatWithRoundMode(lgc::GpurtFloatWithRoundModeOp &inst); void visitGpurtDispatchThreadIdFlatOp(lgc::GpurtDispatchThreadIdFlatOp &inst); + void visitContinuationStackIsGlobalOp(lgc::GpurtContinuationStackIsGlobalOp &inst); llvm::Value *m_stack = nullptr; // Stack array to hold stack value llvm::Type *m_stackTy = nullptr; // Stack type PipelineState *m_pipelineState = nullptr; // Pipeline state diff --git a/lgc/include/lgc/patch/LowerSubgroupOps.h b/lgc/include/lgc/patch/LowerSubgroupOps.h index 3c257320c0..d6ca33c49c 100644 --- a/lgc/include/lgc/patch/LowerSubgroupOps.h +++ b/lgc/include/lgc/patch/LowerSubgroupOps.h @@ -31,7 +31,7 @@ #pragma once #include "compilerutils/TypeLowering.h" -#include "continuations/CpsStackLowering.h" +#include "llvmraytracing/CpsStackLowering.h" #include "lgc/LgcCpsDialect.h" #include "lgc/LgcDialect.h" #include "lgc/patch/Patch.h" @@ -44,7 +44,7 @@ namespace lgc { -class SubgroupLoweringBuilder; +class SubgroupBuilder; // ===================================================================================================================== // The lower subgroup ops pass @@ -61,7 +61,7 @@ class LowerSubgroupOps : public Patch, public llvm::PassInfoMixin attribValues); - void exportVertexAttribs(llvm::Instruction *insertPos); + void exportVertexAttribs(BuilderBase &builder); GfxIpVersion m_gfxIp; // Graphics IP version info PipelineSystemValues m_pipelineSysValues; // Cache of ShaderSystemValues objects, one per shader stage diff --git a/lgc/include/lgc/util/Internal.h b/lgc/include/lgc/util/Internal.h index 663160fe84..e44a1ab0b4 100644 --- a/lgc/include/lgc/util/Internal.h +++ b/lgc/include/lgc/util/Internal.h @@ -35,6 +35,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/GlobalValue.h" namespace llvm { @@ -96,4 +97,8 @@ bool isDontCareValue(llvm::Value *value); // type in a return value struct, ensuring it gets into VGPRs. llvm::Type *getVgprTy(llvm::Type *ty); +// Helper function to create LLVM Function and update NewDbgInfoFormat flag +llvm::Function *createFunctionHelper(llvm::FunctionType *ty, llvm::GlobalValue::LinkageTypes linkage, + llvm::Module *module, const llvm::Twine &name = ""); + } // namespace lgc diff --git a/lgc/interface/lgc/Builder.h b/lgc/interface/lgc/Builder.h index e85e5589bb..cf5555bc30 100644 --- a/lgc/interface/lgc/Builder.h +++ b/lgc/interface/lgc/Builder.h @@ -113,23 +113,39 @@ class InOutInfo { m_data.bits.component = component; } + unsigned getNumComponents() const { return m_data.bits.numComponents; } + void setNumComponents(unsigned numComponents) { + assert(numComponents >= 1 && numComponents <= 8); // Valid range is 1~8 + m_data.bits.numComponents = numComponents; + } + private: union { struct { - unsigned interpMode : 4; // FS input: interpolation mode - unsigned interpLoc : 3; // FS input: interpolation location - unsigned hasInterpAux : 1; // FS input: there is an interpolation auxiliary value - unsigned streamId : 2; // GS output: vertex stream ID (0 if none) - unsigned hasStreamId : 1; // GS output: true if it has a stream ID - unsigned isSigned : 1; // FS output: is signed integer. Determines whether i16-component output - // is zero- or sign-extended - unsigned arraySize : 4; // Built-in array input: shader-defined array size. Must be set for - // a read or write of ClipDistance or CullDistance that is of the - // whole array or of an element with a variable index. - unsigned perPrimitive : 1; // Mesh shader output: whether it is a per-primitive output - unsigned component : 2; // Component offset, specifying which components within a location is consumed + unsigned interpMode : 4; // FS input: interpolation mode + unsigned interpLoc : 3; // FS input: interpolation location + unsigned hasInterpAux : 1; // FS input: there is an interpolation auxiliary value + unsigned streamId : 2; // GS output: vertex stream ID (0 if none) + unsigned hasStreamId : 1; // GS output: true if it has a stream ID + unsigned isSigned : 1; // FS output: is signed integer. Determines whether i16-component output + // is zero- or sign-extended + unsigned arraySize : 4; // Built-in array input: shader-defined array size. Must be set for + // a read or write of ClipDistance or CullDistance that is of the + // whole array or of an element with a variable index. + unsigned perPrimitive : 1; // Mesh shader output: whether it is a per-primitive output + unsigned component : 2; // Component offset, specifying which components within a location is consumed + unsigned numComponents : 4; // Number of components for vector/scalar inputs/outputs. For 64-bit data types, each + // vector element or scalar is considered to occupy two components. The valid range + // is therefore 1~8. This field is used to reserve enough [location, component] map + // items for locations of an input/output when component indexing is performed + // (currently allowed for TCS input/output, TES input, and mesh shader output). If + // not specified (0 by default), LGC will try to determine its value from the + // associated call that reads/writes an input/output. In dynamic component indexing + // case, this field must be specified by frontend when invoking such input/output + // reading/writing call. unsigned disableProvokingVertexMode : 1; // Disable the provoking vertex mode } bits; + unsigned u32All; } m_data; }; diff --git a/lgc/interface/lgc/BuilderCommon.h b/lgc/interface/lgc/BuilderCommon.h index e86d56db06..3f17329334 100644 --- a/lgc/interface/lgc/BuilderCommon.h +++ b/lgc/interface/lgc/BuilderCommon.h @@ -36,6 +36,10 @@ namespace lgc { enum class ResourceNodeType : unsigned; +enum class CooperativeMatrixMemoryAccess : unsigned; +enum class CooperativeMatrixElementType : unsigned; +enum class CooperativeMatrixLayout : unsigned; +enum class CooperativeMatrixArithOp : unsigned; // ===================================================================================================================== // BuilderCommon extends llvm_dialects::Builder, which extends llvm::IRBuilder<>, and provides a few utility methods @@ -84,55 +88,15 @@ class BuilderCommon : public llvm_dialects::Builder { llvm::CallInst *CreateNamedCall(llvm::StringRef funcName, llvm::Type *retTy, llvm::ArrayRef args, llvm::ArrayRef attribs, const llvm::Twine &instName = ""); + // ===================================================================================================================== + // Create alloca for given input type. + // + // @param ty : pointer type. + llvm::Value *CreateAllocaAtFuncEntry(llvm::Type *ty); + // ----------------------------------------------------------------------------------------------------------------- // Cooperative matrix operation. - enum CooperativeMatrixMemoryAccess { - MemoryAccessMaskNone = 0x00, // No mask - MemoryAccessVolatileMask = 0x01, // Access memory in volatile - MemoryAccessCoherentMask = 0x02, // Access memory in coherent - MemoryAccessTemporalMask = 0x04, // Access memory in temporal - }; - - enum CooperativeMatrixElementType { - Unknown = 0, // Unknown - Float16, // 16-bit floating-point - Float32, // 32-bit floating-point - Int8, // 8-bit integer - Int16, // 16-bit integer - Int32, // 32 bit integer - Float16Packed, // packed 16-bit floating-point - }; - - // Layout is virtual concept, eg: 16bit and 32bit for matrixC will share the same layout initially. - // It will be passed as the argument of getTypeProperties to calculate the more detailed layout information. - enum CooperativeMatrixLayout { - FactorMatrixLayout = 0, // A/B layout on gfx10/gfx11 - AccumulatorMatrixLayout, // C/D layout on gfx11 - Gfx10AccumulatorMatrixLayout, // 32bit@C/D layout on gfx10 - Gfx10Accumulator16bitMatrixLayout, // 16bit@C/D layout on gfx10 - InvalidLayout - }; - - // The cooperative matrix arithmetic operations the builder can consume. - // NOTE: We rely on casting this implicitly to an integer, so we cannot use an enum class. - enum class CooperativeMatrixArithOp { - IAdd = 0, - FAdd, - ISub, - FSub, - IMul, - FMul, - UDiv, - SDiv, - FDiv, - UMod, - SRem, - SMod, - FRem, - FMod - }; - // Convert the element type enum into the corresponding LLVM type. llvm::Type *transCooperativeMatrixElementType(CooperativeMatrixElementType elemType); @@ -165,10 +129,12 @@ class BuilderCommon : public llvm_dialects::Builder { // @param colMaj : Whether the values loaded from memory are arrayed in column-major or row-major. // @param layout : Identify it's factor or accumulator // @param memoryAccess : Parsed from Memory operands in SPIRV-reader + // @param alignment : Alignment for memory operation. // @param instName : Name to give instruction(s) llvm::Value *CreateCooperativeMatrixLoad(llvm::Value *pointer, llvm::Value *stride, bool colMajor, CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout, - unsigned memoryAccess, const llvm::Twine &instName = ""); + unsigned memoryAccess, llvm::Align alignment, + const llvm::Twine &instName = ""); // Create cooperative matrix store. // @@ -179,11 +145,12 @@ class BuilderCommon : public llvm_dialects::Builder { // @param colMaj : Whether the values loaded from memory are arrayed in column-major or row-major. // @param layout : Identify it's factor or accumulator // @param memoryAccess : Parsed from Memory operands in SPIRV-reader + // @param alignment : Alignment for memory operation. // @param instName : Name to give instruction(s). llvm::Value *CreateCooperativeMatrixStore(llvm::Value *pointer, llvm::Value *matrix, llvm::Value *stride, bool colMajor, CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout, unsigned memoryAccess, - const llvm::Twine &instName = ""); + llvm::Align alignment, const llvm::Twine &instName = ""); // Create cooperative matrix conversion. // @param opCode : The convert opCode. diff --git a/lgc/interface/lgc/BuiltInDefs.h b/lgc/interface/lgc/BuiltInDefs.h index 771439c1bf..73e38a7fdc 100644 --- a/lgc/interface/lgc/BuiltInDefs.h +++ b/lgc/interface/lgc/BuiltInDefs.h @@ -124,7 +124,7 @@ BUILTIN(InterpLinearCenter, BuiltInInternalBase + 5, N, P, v2f32) BUILTIN(InterpLinearCentroid, BuiltInInternalBase + 6, N, P, v2f32) // Internal built-ins for sample position emulation -BUILTIN(SamplePosOffset, BuiltInInternalBase + 7, N, P, i32) +BUILTIN(SamplePosOffset, BuiltInInternalBase + 7, N, P, v2f32) BUILTIN(NumSamples, BuiltInInternalBase + 8, N, P, i32) BUILTIN(SamplePatternIdx, BuiltInInternalBase + 9, N, P, i32) BUILTIN(GsWaveId, BuiltInInternalBase + 10, N, G, i32) diff --git a/lgc/interface/lgc/LgcDialect.h b/lgc/interface/lgc/LgcDialect.h index 74e34df252..9dc36f4115 100644 --- a/lgc/interface/lgc/LgcDialect.h +++ b/lgc/interface/lgc/LgcDialect.h @@ -30,6 +30,56 @@ */ #pragma once +namespace lgc { + +enum class CooperativeMatrixMemoryAccess : unsigned { + MemoryAccessMaskNone = 0x00, // No mask + MemoryAccessVolatileMask = 0x01, // Access memory in volatile + MemoryAccessCoherentMask = 0x02, // Access memory in coherent + MemoryAccessTemporalMask = 0x04, // Access memory in temporal +}; + +enum class CooperativeMatrixElementType : unsigned { + Unknown = 0, // Unknown + Float16, // 16-bit floating-point + Float32, // 32-bit floating-point + Int8, // 8-bit integer + Int16, // 16-bit integer + Int32, // 32 bit integer + Float16Packed, // packed 16-bit floating-point +}; + +// Layout is virtual concept, eg: 16bit and 32bit for matrixC will share the same layout initially. +// It will be passed as the argument of getTypeProperties to calculate the more detailed layout information. +enum class CooperativeMatrixLayout : unsigned { + FactorMatrixLayout = 0, // A/B layout on gfx10/gfx11 + AccumulatorMatrixLayout, // C/D layout on gfx11 + Gfx10AccumulatorMatrixLayout, // 32bit@C/D layout on gfx10 + Gfx10Accumulator16bitMatrixLayout, // 16bit@C/D layout on gfx10 + InvalidLayout +}; + +// The cooperative matrix arithmetic operations the builder can consume. +// NOTE: We rely on casting this implicitly to an integer, so we cannot use an enum class. +enum class CooperativeMatrixArithOp : unsigned { + IAdd = 0, + FAdd, + ISub, + FSub, + IMul, + FMul, + UDiv, + SDiv, + FDiv, + UMod, + SRem, + SMod, + FRem, + FMod +}; + +} // namespace lgc + #define GET_INCLUDES #define GET_DIALECT_DECLS #include "lgc/LgcDialect.h.inc" diff --git a/lgc/interface/lgc/LgcDialect.td b/lgc/interface/lgc/LgcDialect.td index 1f5759f6cf..ea971d1527 100644 --- a/lgc/interface/lgc/LgcDialect.td +++ b/lgc/interface/lgc/LgcDialect.td @@ -38,6 +38,11 @@ def TaskPayloadPointer : TgConstant<(PointerType 7)>, Type; def V4I32 : TgConstant<(FixedVectorType I32, 4)>, Type; +defm CooperativeMatrixMemoryAccess : AttrEnum<"CooperativeMatrixMemoryAccess">; +defm CooperativeMatrixElementType : AttrEnum<"CooperativeMatrixElementType">; +defm CooperativeMatrixLayout : AttrEnum<"CooperativeMatrixLayout">; +defm CooperativeMatrixArithOp : AttrEnum<"CooperativeMatrixArithOp">; + class LgcOp traits_ = []> : Op; @@ -415,8 +420,8 @@ def SubgroupAnyOp : LgcOp<"subgroup.any", [NoUnwind, Convergent]> { }]; } -def CooperativeRowAccLoadOp : LgcOp<"cooperative.rowacc.load", [Memory<[]>, WillReturn]> { - let arguments = (ins value:$pointer, I32:$stride, AttrI32:$elemType, AttrI32:$memoryAccess); +def CooperativeRowAccLoadOp : LgcOp<"cooperative.rowacc.load", [Memory<[(read)]>, WillReturn]> { + let arguments = (ins value:$pointer, I32:$stride, CooperativeMatrixElementType:$elem_type, CooperativeMatrixMemoryAccess:$memory_access); let results = (outs value:$result); let defaultBuilderHasExplicitResultType = true; @@ -427,34 +432,34 @@ def CooperativeRowAccLoadOp : LgcOp<"cooperative.rowacc.load", [Memory<[]>, Will Return acc row data in finalized mode. + 'pointer' is the pointer address to the data. 'stride' is the stride in bytes in memory between the first elements in the source data. - 'elemType' is the element type for the row acc. - 'memoryAccess' is the memory operands which provide:isVolatile/isTemporal/isCoherent additional operands, + 'elem_type' is the element type for the row acc. + 'memory_access' is the memory operands which provide:isVolatile/isTemporal/isCoherent additional operands, maybe volatile/Aligned/Nontemporal/MakePointerAvailable }]; } -def CooperativeRowAccStoreOp : LgcOp<"cooperative.rowacc.store", [Memory<[]>]> { - let arguments = (ins value:$pointer, I32:$stride, AttrI32:$elemType, value:$value, AttrI32:$memoryAccess); +def CooperativeRowAccStoreOp : LgcOp<"cooperative.rowacc.store", [Memory<[(write)]>]> { + let arguments = (ins value:$pointer, I32:$stride, CooperativeMatrixElementType:$elem_type, value:$data, CooperativeMatrixMemoryAccess:$memory_access); let results = (outs); let summary = "store cooperative rowacc to memory"; let description = [{ Store a contiguous elements from the specified location of the memory. + 'pointer' is the pointer address to the data. 'stride' is the stride in bytes in memory between the first elements in the source data. - 'elemType' is the element type for the row acc. - 'value' is data of row acc, Must be in finalized mode. - 'memoryAccess' is the memory operands which provide:isVolatile/isTemporal/isCoherent additional operands, + 'elem_type' is the element type for the row acc. + 'data' is data of row acc, Must be in finalized mode. + 'memory_access' is the memory operands which provide:isVolatile/isTemporal/isCoherent additional operands, maybe volatile/Aligned/Nontemporal/MakePointerAvailable }]; } -def CooperativeRowAccAccumulateModeOp : LgcOp<"cooperative.rowacc.accumulatemode", [Memory<[]>, WillReturn]> { - let arguments = (ins value:$rowAccValue, AttrI32:$elemType); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; +def CooperativeRowAccAccumulateModeOp : LgcOp<"cooperative.rowacc.accumulate.mode", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$row_acc, CooperativeMatrixElementType:$elem_type); + let results = (outs (eq $row_acc):$result); let summary = "change cooperative row acc date mode from finalize mode to accumulate mode"; let description = [{ @@ -462,24 +467,90 @@ def CooperativeRowAccAccumulateModeOp : LgcOp<"cooperative.rowacc.accumulatemode Return acc row data in accumulate mode. - 'rowAccValue' is the input row acc data, must be in finalize mode. - 'elemType' is the element type for the row acc. + 'row_acc' is the input row acc data, must be in finalize mode. + 'elem_type' is the element type for the row acc. + }]; +} + +def CooperativeRowAccFinalizeModeOp : LgcOp<"cooperative.rowacc.finalize.mode", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$row_acc, CooperativeMatrixElementType:$elem_type); + let results = (outs (eq $row_acc):$result); + + let summary = "change cooperative rowacc date mode from accumulate state to finalize state"; + let description = [{ + convert the row accumulator data from accumulate mode to finalize mode. + + Return row accumulator data in finalized mode. + + 'row_acc' is the input row acc data, must be in accumulate mode. + 'elem_type' is the element type for the row acc. }]; } -def CooperativeRowAccFinalizeModeOp : LgcOp<"cooperative.rowacc.finalizemode", [Memory<[]>, WillReturn]> { - let arguments = (ins value:$rowAccValue, AttrI32:$elemType); +def CooperativeRowAccSplatOp : LgcOp<"cooperative.rowacc.splat", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$scalar, CooperativeMatrixElementType:$elem_type); let results = (outs value:$result); let defaultBuilderHasExplicitResultType = true; - let summary = "change cooperative rowacc date mode from accumulate state to finalize state"; + let summary = "fill cooperative rowacc will a scalar value"; let description = [{ - convert the row acc data from accumulate mode to finalize mode. + Return filled cooperative acc row in finalize mode. - Return acc row data in finalized mode. + 'scalar' is the scalar value for fill the cooperative row acc. + 'elem_type' is the element type for the cooperative row acc. + }]; +} + +def CooperativeRowAccSumAccumulateOp : LgcOp<"cooperative.rowacc.sum.accumulate", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$matrix, CooperativeMatrixElementType:$matrix_elem_type, CooperativeMatrixLayout:$matrix_layout, value:$row_acc, CooperativeMatrixElementType:$row_acc_elem_type, AttrI1:$is_signed); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "sum and accumuate columns of cooperative matrix value to cooperative row accumulator"; + let description = [{ + Return accumulated acc row data in accumulate mode. + + 'matrix' is the input cooperative matrix. Must be A/B matrix. + 'matrix_elem_type' is the element type for the cooperative matrix. + 'matrix_layout' is the layout for the cooperative matrix. + 'row_acc' is the input cooperative row acc, must be in accumulate mode. + 'row_acc_elem_type' is the element type for input cooperative row acc. + 'is_signed' indicate if row accumulator elememnt type need to be consider as signed or not. + }]; +} + +def CooperativeRowAccScalarOp : LgcOp<"cooperative.rowacc.scalar", [Memory<[]>, WillReturn]> { + let arguments = (ins CooperativeMatrixArithOp:$binop, value:$row_acc, CooperativeMatrixElementType:$elem_type, value:$scalar, AttrI1:$accumulate_mode); + let results = (outs (eq $row_acc):$result); + + let summary = "cooperative row accumulator scalar operation in accumulate or finalize mode"; + let description = [{ + Return the cooperative row accumulator data with same mode (accumulate or finalized) as input cooperative accumulator data. + + 'binop' is the cooperative matrix arithmetic operation. + 'row_acc' is the input cooperative row accumulator. + 'elem_type' is the element type for the cooperative row accumulator data. + 'scalar' is the scalar value for operation. + 'accumulate_mode' indicate if input and return accumulator data in accumulate or finalize mode. + }]; +} + +def CooperativeRowAccExpandOp : LgcOp<"cooperative.rowacc.expand", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$row_acc, CooperativeMatrixElementType:$row_acc_elem_type, CooperativeMatrixElementType:$matrix_elem_type, CooperativeMatrixLayout:$matrix_layout, AttrI1:$col_major); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "expand cooperative row accumulator data to cooperative matrix, the input row accumulator data must be in finalize mode."; + let description = [{ + Return the cooperative matrix. - 'rowAccValue' is the input row acc data, must be in accumulate mode. - 'elemType' is the element type for the row acc. + 'row_acc' is the input cooperative row accumulator. + 'row_acc_elem_type' is the element type for the input cooperative row accumulator data. + 'matrix_elem_type' is the element type for the output cooperative matrix. + 'matrix_layout' is the element type for the output cooperative matrix. + 'col_major' indicate how to expand the cooperative row acculumlator data by row or col. }]; } diff --git a/lgc/interface/lgc/ModuleBunch.h b/lgc/interface/lgc/ModuleBunch.h index 742edca661..cec8ece044 100644 --- a/lgc/interface/lgc/ModuleBunch.h +++ b/lgc/interface/lgc/ModuleBunch.h @@ -144,7 +144,12 @@ class ModuleBunchToModulePassAdaptor : public PassInfoMixin std::unique_ptr createForModuleBunchToModulePassAdaptor(ModulePassT Pass) { +#if LLVM_MAIN_REVISION && LLVM_MAIN_REVISION < 488550 using PassModelT = detail::PassModel; +#else + // Analysis are always preserved. + using PassModelT = detail::PassModel; +#endif return std::unique_ptr(new PassModelT(std::forward(Pass))); } diff --git a/lgc/patch/CombineCooperativeMatrix.cpp b/lgc/patch/CombineCooperativeMatrix.cpp index 8dabd3aad4..658d70a2e7 100644 --- a/lgc/patch/CombineCooperativeMatrix.cpp +++ b/lgc/patch/CombineCooperativeMatrix.cpp @@ -34,6 +34,7 @@ */ #include "lgc/patch/CombineCooperativeMatrix.h" #include "lgc/Builder.h" +#include "lgc/LgcDialect.h" #include "lgc/state/Defs.h" #include "lgc/state/PipelineState.h" #include "lgc/state/TargetInfo.h" @@ -48,10 +49,10 @@ using namespace lgc; namespace { struct Shape { - Builder::CooperativeMatrixElementType elementType; - Builder::CooperativeMatrixLayout layout; + CooperativeMatrixElementType elementType; + CooperativeMatrixLayout layout; - Shape(Builder::CooperativeMatrixElementType elementType_, Builder::CooperativeMatrixLayout layout_) + Shape(CooperativeMatrixElementType elementType_, CooperativeMatrixLayout layout_) : elementType(elementType_), layout(layout_) {} bool operator==(const Shape &rhs) const { return elementType == rhs.elementType && layout == rhs.layout; } @@ -132,9 +133,9 @@ bool CooperativeMatrixCombiner::run() { } else if (m_gfxIpVersion.major == 11 && fn.getName().starts_with(lgcName::CooperativeMatrixMulAdd)) { for (User *user : fn.users()) { if (auto *call = dyn_cast(user)) { - Builder::CooperativeMatrixElementType accumElemType = static_cast( - cast(call->getOperand(7))->getZExtValue()); - bool isPackable = accumElemType == Builder::CooperativeMatrixElementType::Float16; + auto accumElemType = + static_cast(cast(call->getOperand(7))->getZExtValue()); + bool isPackable = accumElemType == CooperativeMatrixElementType::Float16; if (call->getFunction() == &m_function && isPackable) { muladds[call->getParent()].push_back(call); } @@ -188,7 +189,7 @@ bool CooperativeMatrixCombiner::run() { Shape CooperativeMatrixCombiner::getShapeOfTranspose(CallInst *transpose) { unsigned elemType = cast(transpose->getArgOperand(1))->getZExtValue(); unsigned layout = cast(transpose->getArgOperand(2))->getZExtValue(); - return {(Builder::CooperativeMatrixElementType)elemType, (Builder::CooperativeMatrixLayout)layout}; + return {(CooperativeMatrixElementType)elemType, (CooperativeMatrixLayout)layout}; } // ===================================================================================================================== @@ -236,10 +237,8 @@ bool CooperativeMatrixCombiner::tryFold(CallInst *op) { // transpose/convert(undef) -> undef, if legal bool isFoldable = true; if (isConvert) { - auto srcElementType = - (Builder::CooperativeMatrixElementType)cast(op->getArgOperand(2))->getZExtValue(); - auto dstElementType = - (Builder::CooperativeMatrixElementType)cast(op->getArgOperand(3))->getZExtValue(); + auto srcElementType = (CooperativeMatrixElementType)cast(op->getArgOperand(2))->getZExtValue(); + auto dstElementType = (CooperativeMatrixElementType)cast(op->getArgOperand(3))->getZExtValue(); if (srcElementType != dstElementType) { // This is slightly conservative, but the point here is that e.g. `zext undef(i16) to i32` can't be folded // to undef because the result can't truly take all possible bit patterns. @@ -350,7 +349,7 @@ bool CooperativeMatrixCombiner::tryFoldComponentContaining(Value *start) { } while (!worklistForward.empty()); // Step 2: Analyze the inputs and outputs. - std::optional otherLayout; + std::optional otherLayout; Type *otherType = nullptr; unsigned numUnhandledInputs = 0; unsigned numTransposeInputs = 0; @@ -366,7 +365,7 @@ bool CooperativeMatrixCombiner::tryFoldComponentContaining(Value *start) { assert(*component.shape == shape); }; - auto foundOtherLayout = [&](Builder::CooperativeMatrixLayout layout, Type *type) { + auto foundOtherLayout = [&](CooperativeMatrixLayout layout, Type *type) { if (!otherLayout) { otherLayout = layout; otherType = type; @@ -398,18 +397,16 @@ bool CooperativeMatrixCombiner::tryFoldComponentContaining(Value *start) { continue; } if (callee->getName().starts_with(lgcName::CooperativeMatrixConvert)) { - auto srcElemType = - (Builder::CooperativeMatrixElementType)cast(call->getArgOperand(2))->getZExtValue(); - auto dstElemType = - (Builder::CooperativeMatrixElementType)cast(call->getArgOperand(3))->getZExtValue(); + auto srcElemType = (CooperativeMatrixElementType)cast(call->getArgOperand(2))->getZExtValue(); + auto dstElemType = (CooperativeMatrixElementType)cast(call->getArgOperand(3))->getZExtValue(); if (srcElemType != dstElemType) { LLVM_DEBUG(dbgs() << " unhandled element type input conversion: " << *call << '\n'); ++numUnhandledInputs; continue; } - auto srcLayout = (Builder::CooperativeMatrixLayout)cast(call->getArgOperand(4))->getZExtValue(); - auto dstLayout = (Builder::CooperativeMatrixLayout)cast(call->getArgOperand(5))->getZExtValue(); + auto srcLayout = (CooperativeMatrixLayout)cast(call->getArgOperand(4))->getZExtValue(); + auto dstLayout = (CooperativeMatrixLayout)cast(call->getArgOperand(5))->getZExtValue(); foundComponentShape({dstElemType, dstLayout}); foundOtherLayout(srcLayout, call->getArgOperand(1)->getType()); @@ -435,18 +432,16 @@ bool CooperativeMatrixCombiner::tryFoldComponentContaining(Value *start) { continue; } if (callee->getName().starts_with(lgcName::CooperativeMatrixConvert)) { - auto srcElemType = - (Builder::CooperativeMatrixElementType)cast(call->getArgOperand(2))->getZExtValue(); - auto dstElemType = - (Builder::CooperativeMatrixElementType)cast(call->getArgOperand(3))->getZExtValue(); + auto srcElemType = (CooperativeMatrixElementType)cast(call->getArgOperand(2))->getZExtValue(); + auto dstElemType = (CooperativeMatrixElementType)cast(call->getArgOperand(3))->getZExtValue(); if (srcElemType != dstElemType) { LLVM_DEBUG(dbgs() << " unhandled element type output conversion: " << *call << '\n'); ++numUnhandledInputs; continue; } - auto srcLayout = (Builder::CooperativeMatrixLayout)cast(call->getArgOperand(4))->getZExtValue(); - auto dstLayout = (Builder::CooperativeMatrixLayout)cast(call->getArgOperand(5))->getZExtValue(); + auto srcLayout = (CooperativeMatrixLayout)cast(call->getArgOperand(4))->getZExtValue(); + auto dstLayout = (CooperativeMatrixLayout)cast(call->getArgOperand(5))->getZExtValue(); foundComponentShape({srcElemType, srcLayout}); foundOtherLayout(dstLayout, call->getType()); @@ -592,8 +587,7 @@ bool CooperativeMatrixCombiner::tryFoldComponentContaining(Value *start) { unsigned dstElemType = cast(call->getArgOperand(3))->getZExtValue(); if (srcElemType == dstElemType) { - unsigned srcLayout = - (Builder::CooperativeMatrixLayout)cast(call->getArgOperand(4))->getZExtValue(); + auto srcLayout = (CooperativeMatrixLayout)cast(call->getArgOperand(4))->getZExtValue(); assert(srcLayout == *otherLayout); (void(srcLayout)); // unused @@ -640,8 +634,7 @@ bool CooperativeMatrixCombiner::tryFoldComponentContaining(Value *start) { unsigned dstElemType = cast(call->getArgOperand(3))->getZExtValue(); if (srcElemType == dstElemType) { - unsigned dstLayout = - (Builder::CooperativeMatrixLayout)cast(call->getArgOperand(5))->getZExtValue(); + auto dstLayout = (CooperativeMatrixLayout)cast(call->getArgOperand(5))->getZExtValue(); assert(dstLayout == *otherLayout); (void(dstLayout)); // unused @@ -918,8 +911,8 @@ Value *CooperativeMatrixCombiner::tryFoldTimesScalar(CallInst *timesScalarLo, Ca scalarVec = b.CreateInsertElement(scalarVec, timesScalarLo->getArgOperand(1), b.getInt32(0)); scalarVec = b.CreateInsertElement(scalarVec, timesScalarHi->getArgOperand(1), b.getInt32(1)); auto *timesScalarPacked = - b.CreateCoopMatrixTimesScalar(packedMatrix, scalarVec, Builder::CooperativeMatrixElementType::Float16Packed, - Builder::CooperativeMatrixLayout::AccumulatorMatrixLayout); + b.CreateCoopMatrixTimesScalar(packedMatrix, scalarVec, CooperativeMatrixElementType::Float16Packed, + CooperativeMatrixLayout::AccumulatorMatrixLayout); m_eraseList.push_back(timesScalarLo); m_eraseList.push_back(timesScalarHi); return timesScalarPacked; diff --git a/lgc/patch/FragColorExport.cpp b/lgc/patch/FragColorExport.cpp index 643e8968a3..4dedd2320c 100644 --- a/lgc/patch/FragColorExport.cpp +++ b/lgc/patch/FragColorExport.cpp @@ -93,11 +93,10 @@ static void extractElements(Value *input, BuilderBase &builder, std::arraygetType(); @@ -120,8 +119,9 @@ Value *FragColorExport::handleColorExportInstructions(Value *output, unsigned hw const auto undefFloat = PoisonValue::get(builder.getFloatTy()); const auto undefFloat16x2 = PoisonValue::get(FixedVectorType::get(builder.getHalfTy(), 2)); + const auto undefHalf = PoisonValue::get(halfTy); - std::array comps; + std::array comps{}; std::array exports{undefFloat, undefFloat, undefFloat, undefFloat}; unsigned exportMask = 0; @@ -141,36 +141,24 @@ Value *FragColorExport::handleColorExportInstructions(Value *output, unsigned hw case EXP_FORMAT_32_R: case EXP_FORMAT_32_GR: case EXP_FORMAT_32_ABGR: { - if (expFmt == EXP_FORMAT_32_R) { + if (expFmt == EXP_FORMAT_32_R) compCount = 1; - channelWriteMask = 0x1; - } else if (expFmt == EXP_FORMAT_32_GR) { + else if (expFmt == EXP_FORMAT_32_GR) compCount = compCount >= 2 ? 2 : 1; - channelWriteMask = 0x3; - } else { - channelWriteMask = 0xF; - } for (unsigned idx = 0; idx < compCount; ++idx) { unsigned compMask = 1 << idx; - if (compMask & channelWriteMask) { - exports[idx] = convertToFloat(comps[idx], signedness, builder); - exportMask |= compMask; - } + exports[idx] = convertToFloat(comps[idx], signedness, builder); + exportMask |= compMask; } break; } case EXP_FORMAT_32_AR: { - channelWriteMask = 0x9; - if (1 & channelWriteMask) { - exports[0] = convertToFloat(comps[0], signedness, builder); - exportMask = 1; - } + exports[0] = convertToFloat(comps[0], signedness, builder); + exportMask = 1; if (compCount == 4) { - if (0x8 & channelWriteMask) { - exports[1] = convertToFloat(comps[3], signedness, builder); - exportMask |= 0x2; - } + exports[1] = convertToFloat(comps[3], signedness, builder); + exportMask |= 0x2; compCount = 2; } else { compCount = 1; @@ -187,9 +175,12 @@ Value *FragColorExport::handleColorExportInstructions(Value *output, unsigned hw extractElements(output, builder, comps); // re-pack for (unsigned idx = 0; idx < compactCompCount; ++idx) { - unsigned origIdx = 2 * idx; - exports[idx] = builder.CreateInsertElement(undefFloat16x2, comps[origIdx], builder.getInt32(0)); - exports[idx] = builder.CreateInsertElement(exports[idx], comps[origIdx + 1], builder.getInt32(1)); + unsigned compId1 = 2 * idx; + unsigned compId2 = compId1 + 1; + exports[idx] = builder.CreateInsertElement(undefFloat16x2, comps[compId1], builder.getInt32(0)); + if (!comps[compId2]) + comps[compId2] = undefHalf; + exports[idx] = builder.CreateInsertElement(exports[idx], comps[compId2], builder.getInt32(1)); } } else { if (outputTy->isIntOrIntVectorTy()) @@ -199,9 +190,12 @@ Value *FragColorExport::handleColorExportInstructions(Value *output, unsigned hw extractElements(output, builder, comps); for (unsigned idx = 0; idx < compactCompCount; ++idx) { - unsigned origIdx = 2 * idx; + unsigned compId1 = 2 * idx; + unsigned compId2 = compId1 + 1; + if (!comps[compId2]) + comps[compId2] = undefHalf; exports[idx] = builder.CreateIntrinsic(FixedVectorType::get(builder.getHalfTy(), 2), - Intrinsic::amdgcn_cvt_pkrtz, {comps[origIdx], comps[origIdx + 1]}); + Intrinsic::amdgcn_cvt_pkrtz, {comps[compId1], comps[compId2]}); } } break; @@ -227,8 +221,12 @@ Value *FragColorExport::handleColorExportInstructions(Value *output, unsigned hw exports[0] = exports[1] = undefFloat16x2; exportMask = compCount > 2 ? 0xF : 0x3; for (unsigned idx = 0; idx < compactCompCount; idx++) { + unsigned compId1 = 2 * idx; + unsigned compId2 = compId1 + 1; + if (!comps[compId2]) + comps[compId2] = undefHalf; Value *packedComps = builder.CreateIntrinsic(FixedVectorType::get(builder.getInt16Ty(), 2), cvtIntrinsic, - {comps[2 * idx], comps[2 * idx + 1]}); + {comps[compId1], comps[compId2]}); exports[idx] = builder.CreateBitCast(packedComps, FixedVectorType::get(builder.getHalfTy(), 2)); } break; @@ -239,24 +237,25 @@ Value *FragColorExport::handleColorExportInstructions(Value *output, unsigned hw } } - if (m_lgcContext->getTargetInfo().getGfxIpVersion().major >= 11) { - if (isDualSource) { - // Save them for later dual-source-swizzle - m_blendSourceChannels = exportTy->isHalfTy() ? (compCount + 1) / 2 : compCount; - assert(hwColorExport <= 1); - m_blendSources[hwColorExport].append(exports.begin(), exports.end()); - return nullptr; - } else if (exportTy->isHalfTy()) { - // GFX11 removes compressed export, simply use 32bit-data export. - exportMask = 0; - const unsigned compactCompCount = (compCount + 1) / 2; - for (unsigned idx = 0; idx < compactCompCount; ++idx) { - exports[idx] = builder.CreateBitCast(exports[idx], builder.getFloatTy()); - exportMask |= 1 << idx; - } - for (unsigned idx = compactCompCount; idx < 4; ++idx) - exports[idx] = undefFloat; + if (isDualSource) { + assert(m_lgcContext->getTargetInfo().getGfxIpVersion().major >= 11); + // Save them for later dual-source-swizzle + m_blendSourceChannels = exportTy->isHalfTy() ? (compCount + 1) / 2 : compCount; + assert(hwColorExport <= 1); + m_blendSources[hwColorExport].append(exports.begin(), exports.end()); + return nullptr; + } + + if (m_lgcContext->getTargetInfo().getGfxIpVersion().major >= 11 && exportTy->isHalfTy()) { + // GFX11 removes compressed export, simply use 32bit-data export. + exportMask = 0; + const unsigned compactCompCount = (compCount + 1) / 2; + for (unsigned idx = 0; idx < compactCompCount; ++idx) { + exports[idx] = builder.CreateBitCast(exports[idx], builder.getFloatTy()); + exportMask |= 1 << idx; } + for (unsigned idx = compactCompCount; idx < 4; ++idx) + exports[idx] = undefFloat; } Value *exportCall = nullptr; @@ -858,11 +857,12 @@ Value *FragColorExport::dualSourceSwizzle(unsigned waveSize, BuilderBase &builde // Update the color export information when enableFragColor is set. // // @param key : Color export info. -// @param originExpinfo : The original color export information for each color export in no particular order.// +// @param originExpinfo : The original color export information for each color export in no particular order. +// @param needMrt0a: The flag to tell MRT0.a is required. // @param pCbShaderMask: The cbShaderMask after update color export information // @param [out] outExpinfo : The updated color export information when enableFragColor is true. void FragColorExport::updateColorExportInfoWithBroadCastInfo(const Key &key, ArrayRef originExpinfo, - SmallVector &outExpinfo, + bool needMrt0a, SmallVector &outExpinfo, unsigned *pCbShaderMask) { // As enableFragColor will only be enabled by OGL, so it will not consider on the dualSource cases. SmallVector broadCastInfo; @@ -879,14 +879,8 @@ void FragColorExport::updateColorExportInfoWithBroadCastInfo(const Key &key, Arr if (exp.hwColorTarget == MaxColorTargets) continue; const unsigned channelWriteMask = key.channelWriteMask[exp.location]; - unsigned gfxIp = m_lgcContext->getTargetInfo().getGfxIpVersion().major; - bool needUpdateMask = false; - if (exp.location == 0 || gfxIp > 10) { - needUpdateMask = - (key.expFmt[exp.location] != 0) && (channelWriteMask > 0 || key.colorExportState.alphaToCoverageEnable); - } else { - needUpdateMask = (key.expFmt[exp.location] != 0) && (channelWriteMask > 0); - } + unsigned expFormat = key.expFmt[exp.location]; + bool needUpdateMask = expFormat != 0 && (channelWriteMask > 0 || (exp.location == 0 && needMrt0a)); if (needUpdateMask) { // For dualSource, the cbShaderMask will only be valid for location=0, other locations setting will be // redundant. ToDo: this point can be optimized when use different ShaderMaskMetaKey or compile different @@ -914,6 +908,7 @@ void FragColorExport::generateExportInstructions(ArrayRef info, // MRTZ export comes first if it exists (this is a HW requirement on gfx11+ and an optional good idea on earlier HW). // We make the assume here that it is also first in the info list. + bool needMrt0a = key.colorExportState.alphaToCoverageEnable; if (!info.empty() && info[0].hwColorTarget == MaxColorTargets) { unsigned depthMask = info[0].location; @@ -933,6 +928,7 @@ void FragColorExport::generateExportInstructions(ArrayRef info, if (alpha->getType()->isIntegerTy()) alpha = builder.CreateBitCast(alpha, builder.getFloatTy()); depthMask |= 0x8; + needMrt0a = false; break; } } @@ -974,7 +970,7 @@ void FragColorExport::generateExportInstructions(ArrayRef info, BasicBlock *dualSourceBlock = nullptr; BasicBlock *normalExportBlock = nullptr; - updateColorExportInfoWithBroadCastInfo(key, info, finalExpInfo, &cbShaderMask); + updateColorExportInfoWithBroadCastInfo(key, info, needMrt0a, finalExpInfo, &cbShaderMask); if (key.colorExportState.dualSourceBlendDynamicEnable && (gfxip >= 11)) { // For dynamiceState, whether do dualSourceBlend will depend on the user data. @@ -1003,21 +999,21 @@ void FragColorExport::generateExportInstructions(ArrayRef info, for (unsigned idx = 0; idx < 2; idx++) { auto infoIt = llvm::find_if(finalExpInfo, [&](const ColorExportInfo &info) { return info.location == idx; }); if (infoIt != finalExpInfo.end()) { - auto dualExpFmt = static_cast(key.dualExpFmt[idx]); + auto dualExpFmt = static_cast(key.expFmt[idx]); const unsigned channelWriteMask = key.channelWriteMask[0]; - bool needExpInst = - (dualExpFmt != EXP_FORMAT_ZERO) && (channelWriteMask > 0 || key.colorExportState.alphaToCoverageEnable); - if (needExpInst) { + if (dualExpFmt != EXP_FORMAT_ZERO && (channelWriteMask > 0 || (infoIt->location == 0 && needMrt0a))) { // Collect info for dualSourceBlend and save then in m_blendSources, so set the last parameter=true; handleColorExportInstructions(values[infoIt->hwColorTarget], idx, builder, dualExpFmt, infoIt->isSigned, - channelWriteMask, true); + true); finalExportFormats.push_back(dualExpFmt); } } } - lastExport = dualSourceSwizzle(key.waveSize, builder); - FragColorExport::setDoneFlag(lastExport, builder); + if (m_blendSourceChannels > 0) { + lastExport = dualSourceSwizzle(key.waveSize, builder); + FragColorExport::setDoneFlag(lastExport, builder); + } builder.CreateRetVoid(); } @@ -1034,12 +1030,11 @@ void FragColorExport::generateExportInstructions(ArrayRef info, assert(infoIt->hwColorTarget < MaxColorTargets); const unsigned channelWriteMask = key.channelWriteMask[location]; auto expFmt = static_cast(key.expFmt[location]); - bool needExpInst = - (expFmt != EXP_FORMAT_ZERO) && (channelWriteMask > 0 || key.colorExportState.alphaToCoverageEnable); + bool needExpInst = (expFmt != 0) && (channelWriteMask > 0 || (location == 0 && needMrt0a)); if (needExpInst) { // Don't collect info for dualSourceBlend just do normal color export, so set the last parameter=false; lastExport = handleColorExportInstructions(values[infoIt->hwColorTarget], hwColorExport, builder, expFmt, - infoIt->isSigned, channelWriteMask, false); + infoIt->isSigned, false); finalExportFormats.push_back(expFmt); ++hwColorExport; } @@ -1144,31 +1139,30 @@ FragColorExport::Key FragColorExport::computeKey(ArrayRef infos infos = infos.drop_front(1); } + // DualSourceBlendDynamicEnable has been concluded from driver and compiler sides. + // 1. Driver set dualSourceBlendDynamicEnable = true when dynamicDualSourceState + // feature is enable + // 2. With Decoration "location=0, index=0(or 1)" in Shader. + // Only in this way, the DualSourceBlendDynamicEnable can be set true finally. + bool isDynamicDualSource = key.colorExportState.dualSourceBlendDynamicEnable; + if (key.enableFragColor) { auto &expInfo = infos[0]; assert(expInfo.ty != nullptr); for (unsigned location = 0; location < MaxColorTargets; ++location) { - if (pipelineState->getColorExportFormat(location).dfmt != BufDataFormatInvalid) { - key.expFmt[location] = pipelineState->computeExportFormat(expInfo.ty, location); - key.channelWriteMask[location] = pipelineState->getColorExportFormat(location).channelWriteMask; + if (pipelineState->getColorExportFormat(location, isDynamicDualSource).dfmt != BufDataFormatInvalid) { + key.expFmt[location] = pipelineState->computeExportFormat(expInfo.ty, location, isDynamicDualSource); + key.channelWriteMask[location] = + pipelineState->getColorExportFormat(location, isDynamicDualSource).channelWriteMask; } } } else { for (auto &info : infos) { - key.expFmt[info.location] = pipelineState->computeExportFormat(info.ty, info.location); - key.channelWriteMask[info.location] = pipelineState->getColorExportFormat(info.location).channelWriteMask; + key.expFmt[info.location] = pipelineState->computeExportFormat(info.ty, info.location, isDynamicDualSource); + key.channelWriteMask[info.location] = + pipelineState->getColorExportFormat(info.location, isDynamicDualSource).channelWriteMask; } } - if ((pipelineState->getTargetInfo().getGfxIpVersion().major >= 11) && - (key.colorExportState.dualSourceBlendEnable || key.colorExportState.dualSourceBlendDynamicEnable)) { - auto info0It = llvm::find_if(infos, [&](const ColorExportInfo &info) { return info.location == 0; }); - assert(info0It != infos.end()); - key.dualExpFmt[0] = pipelineState->computeExportFormat(info0It->ty, 0, true); - auto info1It = llvm::find_if(infos, [&](const ColorExportInfo &info) { return info.location == 1; }); - if (info1It != infos.end()) - key.dualExpFmt[1] = pipelineState->computeExportFormat(info1It->ty, 1, true); - } - return key; } diff --git a/lgc/patch/Gfx9Chip.cpp b/lgc/patch/Gfx9Chip.cpp index 2ae401747c..0ac1173deb 100644 --- a/lgc/patch/Gfx9Chip.cpp +++ b/lgc/patch/Gfx9Chip.cpp @@ -132,7 +132,7 @@ PrimShaderRegConfig::PrimShaderRegConfig(GfxIpVersion gfxIp) { INIT_REG(VGT_GS_ONCHIP_CNTL); // Special registers, having different register IDs - if (gfxIp.major == 9 || gfxIp.major == 10) { + if (gfxIp.major == 10) { INIT_REG_GFX9_10(gfxIp.major, VGT_GS_OUT_PRIM_TYPE); } else if (gfxIp.major == 11) { INIT_REG_GFX11(gfxIp.major, VGT_GS_OUT_PRIM_TYPE); diff --git a/lgc/patch/Gfx9ConfigBuilder.cpp b/lgc/patch/Gfx9ConfigBuilder.cpp index 952cb11b27..30d48f08ba 100644 --- a/lgc/patch/Gfx9ConfigBuilder.cpp +++ b/lgc/patch/Gfx9ConfigBuilder.cpp @@ -1031,11 +1031,7 @@ void ConfigBuilder::buildLsHsRegConfig(ShaderStageEnum shaderStage1, ShaderStage ldsSizeInDwords = alignTo(ldsSizeInDwords, ldsSizeDwordGranularity); const unsigned ldsSize = ldsSizeInDwords >> ldsSizeDwordGranularityShift; - if (m_gfxIp.major == 9) { - SET_REG_GFX9_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC2_HS, LDS_SIZE, ldsSize); - } else { - SET_REG_GFX10_PLUS_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC2_HS, LDS_SIZE, ldsSize); - } + SET_REG_GFX10_PLUS_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC2_HS, LDS_SIZE, ldsSize); if (m_gfxIp.major >= 11) { // Pixel wait sync+ @@ -1247,11 +1243,7 @@ void ConfigBuilder::buildEsGsRegConfig(ShaderStageEnum shaderStage1, ShaderStage const unsigned maxPrimsPerSubgroup = std::min(gsInstPrimsInSubgrp * maxVertOut, MaxGsThreadsPerSubgroup); - if (m_gfxIp.major == 9) { - SET_REG_FIELD(&config->esGsRegs, VGT_GS_MAX_PRIMS_PER_SUBGROUP, MAX_PRIMS_PER_SUBGROUP, maxPrimsPerSubgroup); - } else { - SET_REG_FIELD(&config->esGsRegs, GE_MAX_OUTPUT_PER_SUBGROUP, MAX_VERTS_PER_SUBGROUP, maxPrimsPerSubgroup); - } + SET_REG_FIELD(&config->esGsRegs, GE_MAX_OUTPUT_PER_SUBGROUP, MAX_VERTS_PER_SUBGROUP, maxPrimsPerSubgroup); setNumAvailSgprs(Util::Abi::HardwareStage::Gs, gsResUsage->numSgprsAvailable); setNumAvailVgprs(Util::Abi::HardwareStage::Gs, gsResUsage->numVgprsAvailable); diff --git a/lgc/patch/LowerCooperativeMatrix.cpp b/lgc/patch/LowerCooperativeMatrix.cpp index 8f0954e340..cf880b63a7 100644 --- a/lgc/patch/LowerCooperativeMatrix.cpp +++ b/lgc/patch/LowerCooperativeMatrix.cpp @@ -119,16 +119,14 @@ void LowerCooperativeMatrix::visitCallInst(CallInst &callInst) { auto mangledName = callee->getName(); if (mangledName.starts_with(lgcName::CooperativeMatrixLength)) { - auto layout = - static_cast(cast(callInst.getOperand(1))->getZExtValue()); + auto layout = static_cast(cast(callInst.getOperand(1))->getZExtValue()); callInst.replaceAllUsesWith(builder.getInt32(getLength(layout))); } else if (mangledName.starts_with(lgcName::CooperativeMatrixExtract)) { Value *matrix = callInst.getOperand(0); Value *index = callInst.getOperand(1); auto elemType = - static_cast(cast(callInst.getOperand(2))->getZExtValue()); - auto layout = - static_cast(cast(callInst.getOperand(3))->getZExtValue()); + static_cast(cast(callInst.getOperand(2))->getZExtValue()); + auto layout = static_cast(cast(callInst.getOperand(3))->getZExtValue()); Value *result = cooperativeMatrixExtract(builder, matrix, index, elemType, layout); result->takeName(&callInst); callInst.replaceAllUsesWith(result); @@ -137,18 +135,16 @@ void LowerCooperativeMatrix::visitCallInst(CallInst &callInst) { Value *value = callInst.getOperand(1); Value *index = callInst.getOperand(2); auto elemType = - static_cast(cast(callInst.getOperand(3))->getZExtValue()); - auto layout = - static_cast(cast(callInst.getOperand(4))->getZExtValue()); + static_cast(cast(callInst.getOperand(3))->getZExtValue()); + auto layout = static_cast(cast(callInst.getOperand(4))->getZExtValue()); Value *result = cooperativeMatrixInsert(builder, matrix, value, index, elemType, layout); result->takeName(&callInst); callInst.replaceAllUsesWith(result); } else if (mangledName.starts_with(lgcName::CooperativeMatrixFill)) { Value *value = callInst.getOperand(0); auto elemType = - static_cast(cast(callInst.getOperand(1))->getZExtValue()); - auto layout = - static_cast(cast(callInst.getOperand(2))->getZExtValue()); + static_cast(cast(callInst.getOperand(1))->getZExtValue()); + auto layout = static_cast(cast(callInst.getOperand(2))->getZExtValue()); Value *result = cooperativeMatrixFill(builder, value, elemType, layout); result->takeName(&callInst); callInst.replaceAllUsesWith(result); @@ -156,13 +152,13 @@ void LowerCooperativeMatrix::visitCallInst(CallInst &callInst) { Value *dataPtr = callInst.getOperand(0); Value *stride = callInst.getOperand(1); bool colMajor = cast(callInst.getOperand(2))->getZExtValue(); - Builder::CooperativeMatrixElementType elemType = - static_cast(cast(callInst.getOperand(3))->getZExtValue()); - Builder::CooperativeMatrixLayout layout = - static_cast(cast(callInst.getOperand(4))->getZExtValue()); + auto elemType = + static_cast(cast(callInst.getOperand(3))->getZExtValue()); + auto layout = static_cast(cast(callInst.getOperand(4))->getZExtValue()); unsigned memoryAccess = cast(callInst.getOperand(5))->getZExtValue(); + unsigned alignment = cast(callInst.getOperand(6))->getZExtValue(); - Value *loadVal = cooperativeMatrixLoadInternal(dataPtr, stride, colMajor, elemType, layout, memoryAccess, + Value *loadVal = cooperativeMatrixLoadInternal(dataPtr, stride, colMajor, elemType, layout, memoryAccess, alignment, callInst.getName(), &callInst); callInst.replaceAllUsesWith(loadVal); @@ -170,34 +166,32 @@ void LowerCooperativeMatrix::visitCallInst(CallInst &callInst) { Value *dataPtr = callInst.getOperand(0); Value *stride = callInst.getOperand(1); bool colMajor = cast(callInst.getOperand(2))->getZExtValue(); - Builder::CooperativeMatrixElementType elemType = - static_cast(cast(callInst.getOperand(3))->getZExtValue()); - Builder::CooperativeMatrixLayout layout = - static_cast(cast(callInst.getOperand(4))->getZExtValue()); + auto elemType = + static_cast(cast(callInst.getOperand(3))->getZExtValue()); + auto layout = static_cast(cast(callInst.getOperand(4))->getZExtValue()); unsigned memoryAccess = cast(callInst.getOperand(5))->getZExtValue(); - Value *vecVal = callInst.getOperand(6); + unsigned alignment = cast(callInst.getOperand(6))->getZExtValue(); + Value *vecVal = callInst.getOperand(7); - cooperativeMatrixStoreInternal(dataPtr, stride, colMajor, elemType, layout, memoryAccess, vecVal, + cooperativeMatrixStoreInternal(dataPtr, stride, colMajor, elemType, layout, memoryAccess, alignment, vecVal, callInst.getName(), &callInst); } else if (mangledName.starts_with(lgcName::CooperativeMatrixConvert)) { CastInst::CastOps castOp = static_cast(cast(callInst.getOperand(0))->getZExtValue()); Value *source = callInst.getOperand(1); - Builder::CooperativeMatrixElementType srcElemType = - static_cast(cast(callInst.getOperand(2))->getZExtValue()); - Builder::CooperativeMatrixElementType dstElemType = - static_cast(cast(callInst.getOperand(3))->getZExtValue()); - Builder::CooperativeMatrixLayout srcLayout = - static_cast(cast(callInst.getOperand(4))->getZExtValue()); - Builder::CooperativeMatrixLayout dstLayout = - static_cast(cast(callInst.getOperand(5))->getZExtValue()); + auto srcElemType = + static_cast(cast(callInst.getOperand(2))->getZExtValue()); + auto dstElemType = + static_cast(cast(callInst.getOperand(3))->getZExtValue()); + auto srcLayout = static_cast(cast(callInst.getOperand(4))->getZExtValue()); + auto dstLayout = static_cast(cast(callInst.getOperand(5))->getZExtValue()); Value *resultVal = cooperativeMatrixConvert(castOp, source, srcElemType, dstElemType, srcLayout, dstLayout, callInst.getName(), &callInst); if ((cast(resultVal->getType())->getNumElements() == 4) && - (dstLayout == Builder::CooperativeMatrixLayout::AccumulatorMatrixLayout || - dstLayout == Builder::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout || - dstLayout == Builder::CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout)) { + (dstLayout == CooperativeMatrixLayout::AccumulatorMatrixLayout || + dstLayout == CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout || + dstLayout == CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout)) { // for wave64 needs shuffleVector from V4 to V8 as frontend will always recognize V8 not care wave32 or wave64 resultVal = builder.CreateShuffleVector(resultVal, PoisonValue::get(resultVal->getType()), ArrayRef{0, 1, 2, 3, 4, 5, 6, 7}); @@ -206,23 +200,21 @@ void LowerCooperativeMatrix::visitCallInst(CallInst &callInst) { } else if (mangledName.starts_with(lgcName::CooperativeMatrixTranspose)) { Value *matrix = callInst.getOperand(0); - Builder::CooperativeMatrixElementType elemType = - static_cast(cast(callInst.getOperand(1))->getZExtValue()); - Builder::CooperativeMatrixLayout srcLayout = - static_cast(cast(callInst.getOperand(2))->getZExtValue()); + auto elemType = + static_cast(cast(callInst.getOperand(1))->getZExtValue()); + auto srcLayout = static_cast(cast(callInst.getOperand(2))->getZExtValue()); Value *resultVal = cooperativeMatrixTranspose(matrix, elemType, srcLayout, callInst.getName(), &callInst); callInst.replaceAllUsesWith(resultVal); } else if (mangledName.starts_with(lgcName::CooperativeMatrixBinOp)) { - Builder::CooperativeMatrixArithOp coopMatArithOp = - static_cast(cast(callInst.getOperand(0))->getZExtValue()); + CooperativeMatrixArithOp coopMatArithOp = + static_cast(cast(callInst.getOperand(0))->getZExtValue()); Value *lhs = callInst.getOperand(1); Value *rhs = callInst.getOperand(2); - Builder::CooperativeMatrixElementType elemType = - static_cast(cast(callInst.getOperand(3))->getZExtValue()); - Builder::CooperativeMatrixLayout srcLayout = - static_cast(cast(callInst.getOperand(4))->getZExtValue()); + auto elemType = + static_cast(cast(callInst.getOperand(3))->getZExtValue()); + auto srcLayout = static_cast(cast(callInst.getOperand(4))->getZExtValue()); Value *resultVal = cooperativeMatrixBinaryOp(coopMatArithOp, lhs, rhs, elemType, srcLayout, callInst.getName(), &callInst); @@ -231,10 +223,9 @@ void LowerCooperativeMatrix::visitCallInst(CallInst &callInst) { } else if (mangledName.starts_with(lgcName::CooperativeMatrixTimesScalar)) { Value *matrix = callInst.getOperand(0); Value *scalar = callInst.getOperand(1); - Builder::CooperativeMatrixElementType elemType = - static_cast(cast(callInst.getOperand(2))->getZExtValue()); - Builder::CooperativeMatrixLayout srcLayout = - static_cast(cast(callInst.getOperand(3))->getZExtValue()); + auto elemType = + static_cast(cast(callInst.getOperand(2))->getZExtValue()); + auto srcLayout = static_cast(cast(callInst.getOperand(3))->getZExtValue()); Value *resultVal = coopMatrixTimesScalar(matrix, scalar, elemType, srcLayout, callInst.getName(), &callInst); callInst.replaceAllUsesWith(resultVal); @@ -247,10 +238,10 @@ void LowerCooperativeMatrix::visitCallInst(CallInst &callInst) { bool isSignedB = cast(callInst.getOperand(4))->getZExtValue(); bool isSatOrOpsel = cast(callInst.getOperand(5))->getZExtValue(); bool isTied = cast(callInst.getOperand(6))->getZExtValue(); - Builder::CooperativeMatrixElementType accumElemType = - static_cast(cast(callInst.getOperand(7))->getZExtValue()); - Builder::CooperativeMatrixElementType factorElemType = - static_cast(cast(callInst.getOperand(8))->getZExtValue()); + auto accumElemType = + static_cast(cast(callInst.getOperand(7))->getZExtValue()); + auto factorElemType = + static_cast(cast(callInst.getOperand(8))->getZExtValue()); Value *resultVal = cooperativeMatrixMulAdd(matrixA, matrixB, matrixC, isSignedA, isSignedB, isSatOrOpsel, isTied, accumElemType, factorElemType, callInst.getName(), &callInst); callInst.replaceAllUsesWith(resultVal); @@ -274,16 +265,16 @@ void LowerCooperativeMatrix::visitCallInst(CallInst &callInst) { // Get the "length" of a matrix of the given layout, i.e. the number of matrix components stored per lane. // // @param layout : the matrix layout -unsigned LowerCooperativeMatrix::getLength(Builder::CooperativeMatrixLayout layout) const { +unsigned LowerCooperativeMatrix::getLength(CooperativeMatrixLayout layout) const { auto waveSize = m_pipelineState->getShaderWaveSize(m_shaderStage); switch (layout) { - case BuilderCommon::FactorMatrixLayout: + case CooperativeMatrixLayout::FactorMatrixLayout: return 16; - case BuilderCommon::AccumulatorMatrixLayout: { + case CooperativeMatrixLayout::AccumulatorMatrixLayout: { return waveSize == 32 ? 8 : 4; } - case BuilderCommon::Gfx10AccumulatorMatrixLayout: - case BuilderCommon::Gfx10Accumulator16bitMatrixLayout: + case CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout: + case CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout: return 8; default: llvm_unreachable("unhandled matrix layout"); @@ -296,26 +287,25 @@ unsigned LowerCooperativeMatrix::getLength(Builder::CooperativeMatrixLayout layo // @param elemType : the matrix element type // @param layout : the matrix layout // @returns : the type properties -LowerCooperativeMatrix::TypeProperties -LowerCooperativeMatrix::getTypeProperties(Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout) const { +LowerCooperativeMatrix::TypeProperties LowerCooperativeMatrix::getTypeProperties(CooperativeMatrixElementType elemType, + CooperativeMatrixLayout layout) const { TypeProperties props; props.matrixElementStride = 1; switch (elemType) { - case Builder::CooperativeMatrixElementType::Float32: - case Builder::CooperativeMatrixElementType::Int32: + case CooperativeMatrixElementType::Float32: + case CooperativeMatrixElementType::Int32: props.numMatrixElements = 8; props.numMatrixWords = 8; break; - case Builder::CooperativeMatrixElementType::Float16: - case Builder::CooperativeMatrixElementType::Float16Packed: - case Builder::CooperativeMatrixElementType::Int16: + case CooperativeMatrixElementType::Float16: + case CooperativeMatrixElementType::Float16Packed: + case CooperativeMatrixElementType::Int16: props.numMatrixElements = 16; props.numMatrixWords = 8; break; - case Builder::CooperativeMatrixElementType::Int8: + case CooperativeMatrixElementType::Int8: props.numMatrixElements = 16; props.numMatrixWords = 4; break; @@ -324,22 +314,20 @@ LowerCooperativeMatrix::getTypeProperties(Builder::CooperativeMatrixElementType } auto waveSize = m_pipelineState->getShaderWaveSize(m_shaderStage); - if (layout == Builder::CooperativeMatrixLayout::FactorMatrixLayout) { - assert(elemType != Builder::CooperativeMatrixElementType::Float32 && - elemType != Builder::CooperativeMatrixElementType::Int32); + if (layout == CooperativeMatrixLayout::FactorMatrixLayout) { + assert(elemType != CooperativeMatrixElementType::Float32 && elemType != CooperativeMatrixElementType::Int32); props.numFlatElements = 16; - } else if (layout == Builder::CooperativeMatrixLayout::AccumulatorMatrixLayout) { - if (elemType == Builder::CooperativeMatrixElementType::Float16 || - elemType == Builder::CooperativeMatrixElementType::Int16) { + } else if (layout == CooperativeMatrixLayout::AccumulatorMatrixLayout) { + if (elemType == CooperativeMatrixElementType::Float16 || elemType == CooperativeMatrixElementType::Int16) { props.matrixElementStride = 2; } - if (elemType == Builder::CooperativeMatrixElementType::Float16Packed) { + if (elemType == CooperativeMatrixElementType::Float16Packed) { props.numFlatElements = waveSize == 32 ? 16 : 8; } else { props.numFlatElements = waveSize == 32 ? 8 : 4; } - } else if (layout == Builder::CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout || - layout == Builder::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout) { + } else if (layout == CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout || + layout == CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout) { props.numFlatElements = 8; } else { llvm_unreachable("Unsupported layout!"); @@ -356,8 +344,8 @@ LowerCooperativeMatrix::getTypeProperties(Builder::CooperativeMatrixElementType // @param elemType : Element type for the matrix. // @param layout : Identify whether this matrix is A/B or C/D Value *LowerCooperativeMatrix::convFlatVecToCoopMatrixVec(BuilderCommon &builder, Value *vecValue, - Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout) { + CooperativeMatrixElementType elemType, + CooperativeMatrixLayout layout) { auto props = getTypeProperties(elemType, layout); if (props.numMatrixElements > props.numFlatElements) { @@ -382,8 +370,8 @@ Value *LowerCooperativeMatrix::convFlatVecToCoopMatrixVec(BuilderCommon &builder // @param elemType : Element type for the matrix. // @param layout : Identify whether this matrix is A/B or C/D Value *LowerCooperativeMatrix::convCoopMatrixVecToFlatVec(BuilderCommon &builder, Value *matrixValue, - Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout) { + CooperativeMatrixElementType elemType, + CooperativeMatrixLayout layout) { auto props = getTypeProperties(elemType, layout); Type *flatType = FixedVectorType::get(builder.transCooperativeMatrixElementType(elemType), props.numMatrixElements); @@ -409,9 +397,8 @@ Value *LowerCooperativeMatrix::convCoopMatrixVecToFlatVec(BuilderCommon &builder // @param isColMajor : Identify the order for the data stored in memory, col-major/row-major // @param insertPos : Where to insert the instruction LowerCooperativeMatrix::ComputeAddressInfo -LowerCooperativeMatrix::computeAddressing(Builder::CooperativeMatrixLayout layout, - Builder::CooperativeMatrixElementType elemType, int waveSize, Value *stride, - bool isColMajor, Instruction *insertPos) { +LowerCooperativeMatrix::computeAddressing(CooperativeMatrixLayout layout, CooperativeMatrixElementType elemType, + int waveSize, Value *stride, bool isColMajor, Instruction *insertPos) { BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); Value *threadId = getLaneNumber(builder); @@ -422,16 +409,16 @@ LowerCooperativeMatrix::computeAddressing(Builder::CooperativeMatrixLayout layou addrInfo.microCount = 1; (void)elemType; - if (layout == Builder::CooperativeMatrixLayout::FactorMatrixLayout) { + if (layout == CooperativeMatrixLayout::FactorMatrixLayout) { rowOffsetInFirstVgpr = builder.getInt32(0); addrInfo.macroStep = builder.getInt32(1); - } else if (layout == Builder::CooperativeMatrixLayout::AccumulatorMatrixLayout) { + } else if (layout == CooperativeMatrixLayout::AccumulatorMatrixLayout) { rowOffsetInFirstVgpr = builder.CreateUDiv(threadId, builder.getInt32(16)); addrInfo.macroStep = (waveSize == 64 ? builder.getInt32(4) : builder.getInt32(2)); - } else if (layout == Builder::CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout) { + } else if (layout == CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout) { rowOffsetInFirstVgpr = builder.CreateUDiv(builder.CreateSRem(threadId, builder.getInt32(32)), builder.getInt32(16)); addrInfo.macroStep = builder.getInt32(2); - } else if (layout == Builder::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout) { + } else if (layout == CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout) { // For 16bit@Accumulator@gfx10:lane_0: {0_0,1_0,4_0,5_0,8_0,9_0,12_0,13_0} // lane_16: {2_0,3_0,6_0,7_0,10_0,11_0,14_0,15_0} on lane_16. Value *laneGroupIdx = builder.CreateUDiv(threadId, builder.getInt32(16)); @@ -464,14 +451,15 @@ LowerCooperativeMatrix::computeAddressing(Builder::CooperativeMatrixLayout layou // @param elemType : The element type for the matrix // @param layout : This is identify for factor(A/B) or accumulator(C) for 16 bit element matrix. // @param memoryAccess : The memory operands which provide:isVolatile/isTemporal/isCoherent +// @param alignment: Alignment for the memory access operations. // additional operands, maybe volatile/Aligned/Nontemporal/MakePointerAvailable // /MakePointerVisible/NonPrivatePointer usded by CooperativeMatrix Load/Store. // @param instName : Name to give instruction(s). // @param insertPos : Where to insert the instruction Value *LowerCooperativeMatrix::cooperativeMatrixLoadInternal(Value *dataPtr, Value *stride, bool isColMajor, - Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout, - unsigned memoryAccess, const Twine &instName, + CooperativeMatrixElementType elemType, + CooperativeMatrixLayout layout, unsigned memoryAccess, + unsigned alignment, const Twine &instName, Instruction *insertPos) { BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); @@ -489,22 +477,31 @@ Value *LowerCooperativeMatrix::cooperativeMatrixLoadInternal(Value *dataPtr, Val stride = builder.CreateExactSDiv(stride, builder.getInt32(dataBitwidth / 8)); // calc memoryAccess - bool isVolatile = memoryAccess & Builder::MemoryAccessVolatileMask; - bool isCoherent = memoryAccess & Builder::MemoryAccessCoherentMask; - bool isTemporal = memoryAccess & Builder::MemoryAccessTemporalMask; + bool isVolatile = memoryAccess & (unsigned)(CooperativeMatrixMemoryAccess::MemoryAccessVolatileMask); + bool isCoherent = memoryAccess & (unsigned)(CooperativeMatrixMemoryAccess::MemoryAccessCoherentMask); + bool isTemporal = memoryAccess & (unsigned)(CooperativeMatrixMemoryAccess::MemoryAccessTemporalMask); auto props = getTypeProperties(elemType, layout); auto addrInfo = computeAddressing(layout, elemType, waveSize, stride, isColMajor, insertPos); Value *vecVal = PoisonValue::get(FixedVectorType::get(elemTy, props.numFlatElements)); for (unsigned idx = 0; idx < props.numFlatElements; ++idx) { - Value *offset = builder.CreateAdd( - addrInfo.base, builder.CreateMul(addrInfo.macroStep, builder.getInt32(idx / addrInfo.microCount))); - offset = - builder.CreateAdd(offset, builder.CreateMul(addrInfo.microStep, builder.getInt32(idx % addrInfo.microCount))); - - Value *elePtr = builder.CreateGEP(elemTy, dataPtr, offset); - Value *eleVal = builder.CreateLoad(elemTy, elePtr, isVolatile, instName); + Value *macroOffset = builder.CreateMul(addrInfo.macroStep, builder.getInt32(idx / addrInfo.microCount)); + Value *microOffset = builder.CreateMul(addrInfo.microStep, builder.getInt32(idx % addrInfo.microCount)); + Value *offsetInRowCol = builder.CreateAdd(macroOffset, microOffset); + Value *offsetInMatrix = builder.CreateAdd(addrInfo.base, offsetInRowCol); + Value *elePtr = builder.CreateGEP(elemTy, dataPtr, offsetInMatrix); + Value *eleVal = nullptr; + if (isColMajor) { + // For colMajor@B/C and rowMajor@A, as the elements of one lane are continuous, add the alignments for + // merging load/store instructions on backend later. + unsigned constantOffsetInRowCol = cast(offsetInRowCol)->getZExtValue(); + Align compAlignment = commonAlignment(Align(alignment), constantOffsetInRowCol); + eleVal = builder.CreateAlignedLoad(elemTy, elePtr, compAlignment, isVolatile, instName); + } else { + // For rowMajor@B/C and colMajor@A, as the elements of one lane aren't continuous, no alignments needed. + eleVal = builder.CreateLoad(elemTy, elePtr, isVolatile, instName); + } if (isCoherent && !(addrSpace == ADDR_SPACE_LOCAL && dataBitwidth < 32)) cast(eleVal)->setAtomic(AtomicOrdering::Unordered); if (isTemporal) @@ -532,10 +529,10 @@ Value *LowerCooperativeMatrix::cooperativeMatrixLoadInternal(Value *dataPtr, Val // @param instName : Name to give instruction(s). // @param insertPos : Where to insert the instruction void LowerCooperativeMatrix::cooperativeMatrixStoreInternal(Value *dataPtr, Value *stride, bool isColMajor, - Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout, - unsigned memoryAccess, Value *&vecVal, - const Twine &instName, Instruction *insertPos) { + CooperativeMatrixElementType elemType, + CooperativeMatrixLayout layout, unsigned memoryAccess, + unsigned alignment, Value *&vecVal, const Twine &instName, + Instruction *insertPos) { BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); @@ -552,9 +549,9 @@ void LowerCooperativeMatrix::cooperativeMatrixStoreInternal(Value *dataPtr, Valu stride = builder.CreateExactSDiv(stride, builder.getInt32(dataBitwidth / 8)); // calc memoryAccess - bool isVolatile = memoryAccess & Builder::MemoryAccessVolatileMask; - bool isCoherent = memoryAccess & Builder::MemoryAccessCoherentMask; - bool isTemporal = memoryAccess & Builder::MemoryAccessTemporalMask; + bool isVolatile = memoryAccess & (unsigned)(CooperativeMatrixMemoryAccess::MemoryAccessVolatileMask); + bool isCoherent = memoryAccess & (unsigned)(CooperativeMatrixMemoryAccess::MemoryAccessCoherentMask); + bool isTemporal = memoryAccess & (unsigned)(CooperativeMatrixMemoryAccess::MemoryAccessTemporalMask); auto props = getTypeProperties(elemType, layout); auto addrInfo = computeAddressing(layout, elemType, waveSize, stride, isColMajor, insertPos); @@ -562,14 +559,21 @@ void LowerCooperativeMatrix::cooperativeMatrixStoreInternal(Value *dataPtr, Valu vecVal = convCoopMatrixVecToFlatVec(builder, vecVal, elemType, layout); for (unsigned idx = 0; idx < props.numFlatElements; ++idx) { - Value *offset = builder.CreateAdd( - addrInfo.base, builder.CreateMul(addrInfo.macroStep, builder.getInt32(idx / addrInfo.microCount))); - offset = - builder.CreateAdd(offset, builder.CreateMul(addrInfo.microStep, builder.getInt32(idx % addrInfo.microCount))); - Value *elePtr = builder.CreateGEP(elemTy, dataPtr, offset); + Value *macroOffset = builder.CreateMul(addrInfo.macroStep, builder.getInt32(idx / addrInfo.microCount)); + Value *microOffset = builder.CreateMul(addrInfo.microStep, builder.getInt32(idx % addrInfo.microCount)); + Value *offsetInRowCol = builder.CreateAdd(macroOffset, microOffset); + Value *offsetInMatrix = builder.CreateAdd(addrInfo.base, offsetInRowCol); + Value *elePtr = builder.CreateGEP(elemTy, dataPtr, offsetInMatrix); Value *oneElement = builder.CreateExtractElement(vecVal, idx); - StoreInst *st = builder.CreateStore(oneElement, elePtr, isVolatile); - + StoreInst *st = nullptr; + if (isColMajor) { + // Add alignment same with Load + unsigned constantOffsetInRowCol = cast(offsetInRowCol)->getZExtValue(); + Align compAlignment = commonAlignment(Align(alignment), constantOffsetInRowCol); + st = builder.CreateAlignedStore(oneElement, elePtr, compAlignment, isVolatile); + } else { + st = builder.CreateStore(oneElement, elePtr, isVolatile); + } if (isCoherent && !(addrSpace == ADDR_SPACE_LOCAL && dataBitwidth < 32)) st->setAtomic(AtomicOrdering::Unordered); if (isTemporal) @@ -586,14 +590,14 @@ void LowerCooperativeMatrix::cooperativeMatrixStoreInternal(Value *dataPtr, Valu // @param elemType : the matrix element type // @param layout : the matrix layout type Value *LowerCooperativeMatrix::cooperativeMatrixExtract(BuilderCommon &builder, Value *matrix, Value *index, - Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout) { + CooperativeMatrixElementType elemType, + CooperativeMatrixLayout layout) { Value *vec = convCoopMatrixVecToFlatVec(builder, matrix, elemType, layout); // This is a hacky workaround to the fact that for SPV_NV_cooperative_matrix, we have to support matrix length as // a specialization constant even though, at the time of specialization constant lowering, we don't yet know the // wave size. We should remove this once a healther KHR extension has been released. - if (layout == BuilderCommon::CooperativeMatrixLayout::AccumulatorMatrixLayout && + if (layout == CooperativeMatrixLayout::AccumulatorMatrixLayout && m_pipelineState->getShaderWaveSize(m_shaderStage) == 64) { unsigned length = cast(vec->getType())->getNumElements(); index = builder.CreateAnd(index, builder.getInt32(length - 1)); @@ -612,14 +616,14 @@ Value *LowerCooperativeMatrix::cooperativeMatrixExtract(BuilderCommon &builder, // @param elemType : the matrix element type // @param layout : the matrix layout type Value *LowerCooperativeMatrix::cooperativeMatrixInsert(BuilderCommon &builder, Value *matrix, Value *value, - Value *index, Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout) { + Value *index, CooperativeMatrixElementType elemType, + CooperativeMatrixLayout layout) { Value *vec = convCoopMatrixVecToFlatVec(builder, matrix, elemType, layout); // This is a hacky workaround to the fact that for SPV_NV_cooperative_matrix, we have to support matrix length as // a specialization constant even though, at the time of specialization constant lowering, we don't yet know the // wave size. We should remove this once a healther KHR extension has been released. - if (layout == BuilderCommon::CooperativeMatrixLayout::AccumulatorMatrixLayout && + if (layout == CooperativeMatrixLayout::AccumulatorMatrixLayout && m_pipelineState->getShaderWaveSize(m_shaderStage) == 64) { unsigned length = cast(vec->getType())->getNumElements(); Value *outOfBounds = builder.CreateICmpUGE(index, builder.getInt32(length)); @@ -641,8 +645,8 @@ Value *LowerCooperativeMatrix::cooperativeMatrixInsert(BuilderCommon &builder, V // @param elemType : the matrix element type // @param layout : the matrix layout type Value *LowerCooperativeMatrix::cooperativeMatrixFill(BuilderCommon &builder, Value *value, - Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout) { + CooperativeMatrixElementType elemType, + CooperativeMatrixLayout layout) { auto props = getTypeProperties(elemType, layout); Type *flatType = FixedVectorType::get(builder.transCooperativeMatrixElementType(elemType), props.numMatrixElements); @@ -663,8 +667,8 @@ Value *LowerCooperativeMatrix::cooperativeMatrixFill(BuilderCommon &builder, Val // @param instName : Name to give instruction(s). // @param insertPos : Where to insert the instruction Value *LowerCooperativeMatrix::cooperativeMatrixConvertInternal(CastInst::CastOps castOp, Value *source, - Builder::CooperativeMatrixElementType srcElemType, - Builder::CooperativeMatrixElementType dstElemType, + CooperativeMatrixElementType srcElemType, + CooperativeMatrixElementType dstElemType, const Twine &instName, Instruction *insertPos) { BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); @@ -672,8 +676,7 @@ Value *LowerCooperativeMatrix::cooperativeMatrixConvertInternal(CastInst::CastOp const unsigned vecSize = cast(source->getType())->getNumElements(); Type *dstType = FixedVectorType::get(builder.transCooperativeMatrixElementType(dstElemType), vecSize); - if ((srcElemType == Builder::CooperativeMatrixElementType::Float16 || - srcElemType == Builder::CooperativeMatrixElementType::Float32) && + if ((srcElemType == CooperativeMatrixElementType::Float16 || srcElemType == CooperativeMatrixElementType::Float32) && (castOp == Instruction::FPToUI || castOp == Instruction::FPToSI)) { // FIXME: fp16's range is covered by i32. So `fptoi half` can convert // to i32 first following a sext/zext to target integer type. @@ -702,11 +705,11 @@ Value *LowerCooperativeMatrix::cooperativeMatrixConvertInternal(CastInst::CastOp // @param instName : Name to give instruction(s). // @param insertPos : Where to insert the instruction Value *LowerCooperativeMatrix::cooperativeMatrixConvert(CastInst::CastOps castOp, Value *source, - Builder::CooperativeMatrixElementType srcElemType, - Builder::CooperativeMatrixElementType dstElemType, - Builder::CooperativeMatrixLayout srcLayout, - Builder::CooperativeMatrixLayout dstLayout, - const Twine &instName, Instruction *insertPos) { + CooperativeMatrixElementType srcElemType, + CooperativeMatrixElementType dstElemType, + CooperativeMatrixLayout srcLayout, + CooperativeMatrixLayout dstLayout, const Twine &instName, + Instruction *insertPos) { assert(source->getType()->isVectorTy()); BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); @@ -714,8 +717,8 @@ Value *LowerCooperativeMatrix::cooperativeMatrixConvert(CastInst::CastOps castOp Value *threadId = getLaneNumber(builder); if (castOp == 0) { // Only reshape on 16bits, not do convert - if ((srcLayout == Builder::CooperativeMatrixLayout::AccumulatorMatrixLayout) && - (dstLayout == Builder::CooperativeMatrixLayout::FactorMatrixLayout)) { + if ((srcLayout == CooperativeMatrixLayout::AccumulatorMatrixLayout) && + (dstLayout == CooperativeMatrixLayout::FactorMatrixLayout)) { // After mulAdd, the type for the matrix waiting to reshape is 8*float here const unsigned vecNums = cast(source->getType())->getNumElements(); source = builder.CreateBitCast(source, FixedVectorType::get(builder.getInt32Ty(), vecNums)); @@ -763,9 +766,9 @@ Value *LowerCooperativeMatrix::cooperativeMatrixConvert(CastInst::CastOps castOp // @param layout : Layout for the matrix. // @param instName : Name to give instruction(s). // @param insertPos : Where to insert the instruction -Value *LowerCooperativeMatrix::cooperativeMatrixBinaryOp(Builder::CooperativeMatrixArithOp coopMatArithOp, Value *lhs, - Value *rhs, Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout, const Twine &instName, +Value *LowerCooperativeMatrix::cooperativeMatrixBinaryOp(CooperativeMatrixArithOp coopMatArithOp, Value *lhs, + Value *rhs, CooperativeMatrixElementType elemType, + CooperativeMatrixLayout layout, const Twine &instName, Instruction *insertPos) { assert(lhs->getType()->isVectorTy() && lhs->getType() == rhs->getType() || rhs->getType()->isVectorTy()); Value *vcResult; @@ -775,31 +778,31 @@ Value *LowerCooperativeMatrix::cooperativeMatrixBinaryOp(Builder::CooperativeMat lhs = convCoopMatrixVecToFlatVec(builder, lhs, elemType, layout); rhs = convCoopMatrixVecToFlatVec(builder, rhs, elemType, layout); switch (coopMatArithOp) { - case Builder::CooperativeMatrixArithOp::IAdd: + case CooperativeMatrixArithOp::IAdd: vcResult = builder.CreateAdd(lhs, rhs); break; - case Builder::CooperativeMatrixArithOp::FAdd: + case CooperativeMatrixArithOp::FAdd: vcResult = builder.CreateFAdd(lhs, rhs); break; - case Builder::CooperativeMatrixArithOp::ISub: + case CooperativeMatrixArithOp::ISub: vcResult = builder.CreateSub(lhs, rhs); break; - case Builder::CooperativeMatrixArithOp::FSub: + case CooperativeMatrixArithOp::FSub: vcResult = builder.CreateFSub(lhs, rhs); break; - case Builder::CooperativeMatrixArithOp::IMul: + case CooperativeMatrixArithOp::IMul: vcResult = builder.CreateMul(lhs, rhs); break; - case Builder::CooperativeMatrixArithOp::FMul: + case CooperativeMatrixArithOp::FMul: vcResult = builder.CreateFMul(lhs, rhs); break; - case Builder::CooperativeMatrixArithOp::FDiv: + case CooperativeMatrixArithOp::FDiv: vcResult = builder.CreateFDiv(lhs, rhs); break; - case Builder::CooperativeMatrixArithOp::SDiv: + case CooperativeMatrixArithOp::SDiv: vcResult = builder.CreateSDiv(lhs, rhs); break; - case Builder::CooperativeMatrixArithOp::UDiv: + case CooperativeMatrixArithOp::UDiv: vcResult = builder.CreateUDiv(lhs, rhs); break; default: @@ -821,8 +824,8 @@ Value *LowerCooperativeMatrix::cooperativeMatrixBinaryOp(Builder::CooperativeMat // @param instName : Name to give instruction(s). // @param insertPos : Where to insert the instruction Value *LowerCooperativeMatrix::coopMatrixTimesScalar(Value *matrix, Value *scalar, - Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout layout, const Twine &instName, + CooperativeMatrixElementType elemType, + CooperativeMatrixLayout layout, const Twine &instName, Instruction *insertPos) { assert(matrix->getType()->getScalarType()->isIntegerTy() || matrix->getType()->getScalarType()->isFloatTy()); BuilderBase builder(*m_context); @@ -836,9 +839,8 @@ Value *LowerCooperativeMatrix::coopMatrixTimesScalar(Value *matrix, Value *scala auto splat = packedScalarVec ? builder.CreateShuffleVector(scalar, shuffleIndices) : builder.CreateVectorSplat(numElems, scalar); Value *vcFlatResult; - if ((elemType == Builder::CooperativeMatrixElementType::Float16) || - (elemType == Builder::CooperativeMatrixElementType::Float32) || - (elemType == Builder::CooperativeMatrixElementType::Float16Packed)) { + if ((elemType == CooperativeMatrixElementType::Float16) || (elemType == CooperativeMatrixElementType::Float32) || + (elemType == CooperativeMatrixElementType::Float16Packed)) { vcFlatResult = builder.CreateFMul(vcFlat, splat); } else { vcFlatResult = builder.CreateMul(vcFlat, splat); @@ -859,10 +861,9 @@ Value *LowerCooperativeMatrix::coopMatrixTimesScalar(Value *matrix, Value *scala // @param instName : Name to give instruction(s). // @param insertPos : Where to insert the instruction Value *LowerCooperativeMatrix::cooperativeMatrixReshape16BitElementGfx1011( - Value *source, Builder::CooperativeMatrixElementType srcElemType, Builder::CooperativeMatrixLayout srcLayout, - Builder::CooperativeMatrixLayout dstLayout, Value *threadId, const Twine &instName, Instruction *insertPos) { - assert(srcElemType == Builder::CooperativeMatrixElementType::Float16 || - srcElemType == Builder::CooperativeMatrixElementType::Int16); + Value *source, CooperativeMatrixElementType srcElemType, CooperativeMatrixLayout srcLayout, + CooperativeMatrixLayout dstLayout, Value *threadId, const Twine &instName, Instruction *insertPos) { + assert(srcElemType == CooperativeMatrixElementType::Float16 || srcElemType == CooperativeMatrixElementType::Int16); BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); Value *resultValue = nullptr; @@ -884,10 +885,10 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshape16BitElementGfx1011( return builder.CreateIntrinsic(int32Ty, Intrinsic::amdgcn_permlane64, {mappedArgs[0]}); }; - if (srcLayout == Builder::CooperativeMatrixLayout::FactorMatrixLayout) { // From A/B to C/D for 16bit element + if (srcLayout == CooperativeMatrixLayout::FactorMatrixLayout) { // From A/B to C/D for 16bit element Type *packedTy = - (srcElemType == Builder::CooperativeMatrixElementType::Float16) ? builder.getFloatTy() : builder.getInt32Ty(); - if (dstLayout == Builder::CooperativeMatrixLayout::AccumulatorMatrixLayout) { + (srcElemType == CooperativeMatrixElementType::Float16) ? builder.getFloatTy() : builder.getInt32Ty(); + if (dstLayout == CooperativeMatrixLayout::AccumulatorMatrixLayout) { unsigned vecSize = cast(source->getType())->getNumElements(); assert(vecSize == 8); // A/B should be 8*float16 or 8*int16 unsigned shiftVecNums = 8; @@ -927,15 +928,14 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshape16BitElementGfx1011( resultValue = builder.CreateLShr(resultValue, builder.CreateSelect(isEvenGroup, shiftZeorValue, shift16Value), instName); - if (srcElemType == Builder::CooperativeMatrixElementType::Float16) { + if (srcElemType == CooperativeMatrixElementType::Float16) { resultValue = builder.CreateBitCast(resultValue, FixedVectorType::get(builder.getFloatTy(), shiftVecNums), instName); // Bitcast to 8*bit32 for wave32 and 4*bit32 for wave64 resultValue = builder.CreateShuffleVector(resultValue, PoisonValue::get(resultValue->getType()), {0, 1, 2, 3, 4, 5, 6, 7}); } - } else if (dstLayout == - Builder::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout) { // Emulation on NAVI2X - // from A/B to C/D on 16bit + } else if (dstLayout == CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout) { // Emulation on NAVI2X + // from A/B to C/D on 16bit resultValue = PoisonValue::get(FixedVectorType::get(packedTy, 8)); // Wave32/wave64 : lane0 : {1_0:0_0 3_0:2_0....15_0:14_0} lane16 : {1_0:0_0 3_0:2_0....15_0:14_0} // lane16 ~lane31 is redundant reshape to @@ -953,8 +953,8 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshape16BitElementGfx1011( // It's unnecessary for reshape after gfx11. resultValue = source; } - } else if (srcLayout == Builder::CooperativeMatrixLayout::AccumulatorMatrixLayout) { - if (dstLayout == Builder::CooperativeMatrixLayout::FactorMatrixLayout) { + } else if (srcLayout == CooperativeMatrixLayout::AccumulatorMatrixLayout) { + if (dstLayout == CooperativeMatrixLayout::FactorMatrixLayout) { // lane0----lan16----lane32-----lane48*/ // 1x-------1y-------1m---------1n*/ // ==> */ @@ -1004,17 +1004,17 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshape16BitElementGfx1011( matrix = builder.CreateShuffleVector(first, second, ArrayRef({0, 8, 1, 9, 2, 10, 3, 11}), instName); } // After shuffle wave64's layout is same with wave32 - if (srcElemType == Builder::CooperativeMatrixElementType::Float16) { + if (srcElemType == CooperativeMatrixElementType::Float16) { matrix = builder.CreateBitCast(matrix, FixedVectorType::get(builder.getFloatTy(), 8)); //->8*f32 } resultValue = matrix; } - } else if (srcLayout == Builder::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout) { - if (dstLayout == Builder::CooperativeMatrixLayout::FactorMatrixLayout) { // NAVI2X:16bit reshape C/D->A/B + } else if (srcLayout == CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout) { + if (dstLayout == CooperativeMatrixLayout::FactorMatrixLayout) { // NAVI2X:16bit reshape C/D->A/B // C/D: LANE0: {1_0:0_0 5_0:4_0 9_0:8_0 13_0:12_0} LANE16:{3_0:2_0 7_0:6_0 11_0:10_0 15_0:14_0}===> // A/B: LANE0: {1_0:0_0 3_0:2_0 5_0:4:0....15_0:14_0} LANE16=LANE0 Type *packedTy = - (srcElemType == Builder::CooperativeMatrixElementType::Float16) ? builder.getFloatTy() : builder.getInt32Ty(); + (srcElemType == CooperativeMatrixElementType::Float16) ? builder.getFloatTy() : builder.getInt32Ty(); resultValue = PoisonValue::get(FixedVectorType::get(packedTy, 8)); unsigned LaneSelBits[2] = {0x76543210, 0xfedcba98}; Value *swapped = builder.CreateMapToSimpleType( @@ -1049,8 +1049,8 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshape16BitElementGfx1011( // @param instName : Name to give instruction(s). // @param insertPos : Where to insert the instruction Value *LowerCooperativeMatrix::cooperativeMatrixReshapeBetween8bitAnd32bitElementGfx1011( - Value *source, Builder::CooperativeMatrixElementType srcElemType, Builder::CooperativeMatrixLayout srcLayout, - const Twine &instName, Instruction *insertPos) { + Value *source, CooperativeMatrixElementType srcElemType, CooperativeMatrixLayout srcLayout, const Twine &instName, + Instruction *insertPos) { BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); @@ -1060,8 +1060,8 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshapeBetween8bitAnd32bitElemen Value *laneGroupIdx = builder.CreateUDiv(threadId, builder.getInt32(16)); Value *isEvenGroup = builder.CreateICmpEQ(builder.CreateAnd(laneGroupIdx, builder.getInt32(1)), builder.getInt32(0)); - if (srcLayout == Builder::CooperativeMatrixLayout::FactorMatrixLayout) { - assert(srcElemType == Builder::CooperativeMatrixElementType::Int8); + if (srcLayout == CooperativeMatrixLayout::FactorMatrixLayout) { + assert(srcElemType == CooperativeMatrixElementType::Int8); Value *int8Value = builder.CreateBitCast(source, FixedVectorType::get(builder.getInt8Ty(), 16)); if ((waveSize == 32) || (m_gfxIp.major < 11)) { Value *lowValue = builder.CreateShuffleVector(int8Value, ArrayRef({0, 2, 4, 6, 8, 10, 12, 14})); @@ -1086,11 +1086,10 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshapeBetween8bitAnd32bitElemen resultValue = builder.CreateSelect(isEvenGroupMoreThan32, highlowValue, resultValue, instName); resultValue = builder.CreateSelect(isOddGroupMoreThan32, highhighValue, resultValue, instName); } - } else if (srcLayout == Builder::CooperativeMatrixLayout::AccumulatorMatrixLayout || - srcLayout == Builder::CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout) { + } else if (srcLayout == CooperativeMatrixLayout::AccumulatorMatrixLayout || + srcLayout == CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout) { // - assert(srcElemType == Builder::CooperativeMatrixElementType::Int32 || - srcElemType == Builder::CooperativeMatrixElementType::Float32); + assert(srcElemType == CooperativeMatrixElementType::Int32 || srcElemType == CooperativeMatrixElementType::Float32); // unsigned vecSize = cast(source->getType())->getNumElements(); unsigned vecSize = 8; source = builder.CreateShuffleVector(source, PoisonValue::get(source->getType()), {0, 1, 2, 3, 4, 5, 6, 7}); @@ -1157,8 +1156,8 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshapeBetween8bitAnd32bitElemen // @param instName : Name to give instruction(s). // @param insertPos : Where to insert the instruction Value *LowerCooperativeMatrix::cooperativeMatrixReshapeBetween16bitAnd32bitOnAccGfx10( - Value *source, Builder::CooperativeMatrixElementType srcElemType, Builder::CooperativeMatrixElementType dstElemType, - Builder::CooperativeMatrixLayout layout, Value *isEvenGroup, const Twine &instName, Instruction *insertPos) { + Value *source, CooperativeMatrixElementType srcElemType, CooperativeMatrixElementType dstElemType, + CooperativeMatrixLayout layout, Value *isEvenGroup, const Twine &instName, Instruction *insertPos) { // 1. After convert from f32->f16: change the layout from 32bit layout to 16bit layout on Accumulator on gfx10. // 2. Before convert from f16->f32: change the layout from 16bit layout to 32bit layout on Accumulator on gfx10 @@ -1169,17 +1168,16 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshapeBetween16bitAnd32bitOnAcc // From the implementation side, it's same which only exchange off-diaglog element between {2_0:0_0} and {3_0:1_0}(1st // case) // or {1_0:0_0} and {3_0:2_0}(2nd case) - assert(layout == Builder::CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout || - layout == Builder::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout); + assert(layout == CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout || + layout == CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout); BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); Value *resultValue = nullptr; - if (dstElemType == Builder::CooperativeMatrixElementType::Float16 || - dstElemType == Builder::CooperativeMatrixElementType::Int16) { + if (dstElemType == CooperativeMatrixElementType::Float16 || dstElemType == CooperativeMatrixElementType::Int16) { source = builder.CreateBitCast(source, FixedVectorType::get(builder.getInt32Ty(), 4)); - } else if (dstElemType == Builder::CooperativeMatrixElementType::Float32 || - dstElemType == Builder::CooperativeMatrixElementType::Int32) { + } else if (dstElemType == CooperativeMatrixElementType::Float32 || + dstElemType == CooperativeMatrixElementType::Int32) { source = builder.CreateBitCast(source, FixedVectorType::get(builder.getInt32Ty(), 8)); } unsigned LaneSelBits[2] = {0x76543210, 0xfedcba98}; @@ -1214,9 +1212,8 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshapeBetween16bitAnd32bitOnAcc isEvenGroup, builder.CreateAnd(builder.CreateShl(swapped, shiftValue), maskHighValue), maskedSourceHigh); resultValue = builder.CreateOr(highVal, lowVal); - if (srcElemType == Builder::CooperativeMatrixElementType::Float16 && - (dstElemType == Builder::CooperativeMatrixElementType::Float32 || - dstElemType == Builder::CooperativeMatrixElementType::Int32)) { + if (srcElemType == CooperativeMatrixElementType::Float16 && + (dstElemType == CooperativeMatrixElementType::Float32 || dstElemType == CooperativeMatrixElementType::Int32)) { resultValue = builder.CreateBitCast(resultValue, FixedVectorType::get(builder.getHalfTy(), 16)); // 2nd case:before convert } else { @@ -1238,10 +1235,10 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshapeBetween16bitAnd32bitOnAcc // @param instName : Name to give instruction(s). // @param insertPos : Where to insert the instruction Value *LowerCooperativeMatrix::cooperativeMatrixReshapeBeforeConvert(Value *source, - Builder::CooperativeMatrixElementType srcElemType, - Builder::CooperativeMatrixElementType dstElemType, - Builder::CooperativeMatrixLayout srcLayout, - Builder::CooperativeMatrixLayout dstLayout, + CooperativeMatrixElementType srcElemType, + CooperativeMatrixElementType dstElemType, + CooperativeMatrixLayout srcLayout, + CooperativeMatrixLayout dstLayout, const Twine &instName, Instruction *insertPos) { BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); @@ -1251,32 +1248,31 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshapeBeforeConvert(Value *sour Value *laneGroupIdx = builder.CreateUDiv(threadId, builder.getInt32(16)); Value *isEvenGroup = builder.CreateICmpEQ(builder.CreateAnd(laneGroupIdx, builder.getInt32(1)), builder.getInt32(0)); - if (srcElemType == Builder::CooperativeMatrixElementType::Float16 || - srcElemType == Builder::CooperativeMatrixElementType::Int16) { - if (srcLayout == Builder::CooperativeMatrixLayout::FactorMatrixLayout && - dstLayout == Builder::CooperativeMatrixLayout::AccumulatorMatrixLayout) { + if (srcElemType == CooperativeMatrixElementType::Float16 || srcElemType == CooperativeMatrixElementType::Int16) { + if (srcLayout == CooperativeMatrixLayout::FactorMatrixLayout && + dstLayout == CooperativeMatrixLayout::AccumulatorMatrixLayout) { resultValue = cooperativeMatrixReshape16BitElementGfx1011(source, srcElemType, srcLayout, dstLayout, threadId, "reshapeFactorToAcc", insertPos); resultValue = convCoopMatrixVecToFlatVec(builder, resultValue, srcElemType, dstLayout); - } else if (srcLayout == Builder::CooperativeMatrixLayout::FactorMatrixLayout && - dstLayout == Builder::CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout) { + } else if (srcLayout == CooperativeMatrixLayout::FactorMatrixLayout && + dstLayout == CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout) { resultValue = cooperativeMatrixReshape16BitElementGfx1011( - source, srcElemType, srcLayout, Builder::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout, threadId, + source, srcElemType, srcLayout, CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout, threadId, "reshapeFactorToAcc", insertPos); resultValue = cooperativeMatrixReshapeBetween16bitAnd32bitOnAccGfx10( resultValue, srcElemType, dstElemType, dstLayout, isEvenGroup, "beforef16tof32", insertPos); resultValue = convCoopMatrixVecToFlatVec(builder, resultValue, srcElemType, dstLayout); - } else if (srcLayout == Builder::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout && - dstLayout == Builder::CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout) { + } else if (srcLayout == CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout && + dstLayout == CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout) { resultValue = cooperativeMatrixReshapeBetween16bitAnd32bitOnAccGfx10(source, srcElemType, dstElemType, dstLayout, isEvenGroup, "beforef16tof32", insertPos); resultValue = convCoopMatrixVecToFlatVec(builder, resultValue, srcElemType, dstLayout); } else { llvm_unreachable("Unsupported layout!"); } - } else if (srcElemType == Builder::CooperativeMatrixElementType::Int8) { + } else if (srcElemType == CooperativeMatrixElementType::Int8) { // 8bit already return the N*flatType, it's unnecessary to call convCoopMatrixVecToFlatVec - if (srcLayout == Builder::CooperativeMatrixLayout::FactorMatrixLayout) { + if (srcLayout == CooperativeMatrixLayout::FactorMatrixLayout) { resultValue = cooperativeMatrixReshapeBetween8bitAnd32bitElementGfx1011(source, srcElemType, srcLayout, "reshapeFactorToAcc", insertPos); } else { @@ -1302,10 +1298,10 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshapeBeforeConvert(Value *sour // @param instName : Name to give instruction(s). // @param insertPos : Where to insert the instruction Value *LowerCooperativeMatrix::cooperativeMatrixReshapeAfterConvert(Value *source, - Builder::CooperativeMatrixElementType srcElemType, - Builder::CooperativeMatrixElementType dstElemType, - Builder::CooperativeMatrixLayout srcLayout, - Builder::CooperativeMatrixLayout dstLayout, + CooperativeMatrixElementType srcElemType, + CooperativeMatrixElementType dstElemType, + CooperativeMatrixLayout srcLayout, + CooperativeMatrixLayout dstLayout, const Twine &instName, Instruction *insertPos) { BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); @@ -1315,10 +1311,9 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshapeAfterConvert(Value *sourc Value *laneGroupIdx = builder.CreateUDiv(threadId, builder.getInt32(16)); Value *isEvenGroup = builder.CreateICmpEQ(builder.CreateAnd(laneGroupIdx, builder.getInt32(1)), builder.getInt32(0)); - if (dstElemType == Builder::CooperativeMatrixElementType::Float16 || - dstElemType == Builder::CooperativeMatrixElementType::Int16) { - if (srcLayout == Builder::CooperativeMatrixLayout::AccumulatorMatrixLayout && - dstLayout == Builder::CooperativeMatrixLayout::FactorMatrixLayout) { + if (dstElemType == CooperativeMatrixElementType::Float16 || dstElemType == CooperativeMatrixElementType::Int16) { + if (srcLayout == CooperativeMatrixLayout::AccumulatorMatrixLayout && + dstLayout == CooperativeMatrixLayout::FactorMatrixLayout) { // It needs to convert 16bit*8 into 32bit*8(high 16bit will be unused) as // the input for reshape interface will be 32bit*8 keeping compatibility for reshape+muladd+reshape case. resultValue = @@ -1327,22 +1322,22 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshapeAfterConvert(Value *sourc resultValue = builder.CreateZExt(resultValue, FixedVectorType::get(builder.getInt32Ty(), 8), "zext"); resultValue = cooperativeMatrixReshape16BitElementGfx1011(resultValue, dstElemType, srcLayout, dstLayout, threadId, "reshapeAccToFactor", insertPos); - } else if (srcLayout == Builder::CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout && - dstLayout == Builder::CooperativeMatrixLayout::FactorMatrixLayout) { + } else if (srcLayout == CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout && + dstLayout == CooperativeMatrixLayout::FactorMatrixLayout) { resultValue = cooperativeMatrixReshapeBetween16bitAnd32bitOnAccGfx10(source, srcElemType, dstElemType, srcLayout, isEvenGroup, "afterf32tof16", insertPos); resultValue = cooperativeMatrixReshape16BitElementGfx1011( - resultValue, dstElemType, Builder::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout, dstLayout, - threadId, "reshapeAccToFactor", insertPos); - } else if (srcLayout == Builder::CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout && - dstLayout == Builder::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout) { + resultValue, dstElemType, CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout, dstLayout, threadId, + "reshapeAccToFactor", insertPos); + } else if (srcLayout == CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout && + dstLayout == CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout) { resultValue = cooperativeMatrixReshapeBetween16bitAnd32bitOnAccGfx10(source, srcElemType, dstElemType, srcLayout, isEvenGroup, "afterf32tof16", insertPos); } else { llvm_unreachable("Unsupported elemtype!"); } - } else if (dstElemType == Builder::CooperativeMatrixElementType::Int8) { - if (dstLayout == Builder::CooperativeMatrixLayout::FactorMatrixLayout) { // gfx10/gfx11: 32bit->8bit + } else if (dstElemType == CooperativeMatrixElementType::Int8) { + if (dstLayout == CooperativeMatrixLayout::FactorMatrixLayout) { // gfx10/gfx11: 32bit->8bit resultValue = cooperativeMatrixReshapeBetween8bitAnd32bitElementGfx1011(source, srcElemType, srcLayout, "reshapeFactorToAcc", insertPos); } else { @@ -1362,10 +1357,9 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshapeAfterConvert(Value *sourc // @param srcLayout: Identify whether it's A/B or C/D // @param instName : Name to give instruction(s). // @param insertPos : Where to insert the instruction -Value *LowerCooperativeMatrix::cooperativeMatrixTranspose(llvm::Value *matrix, - Builder::CooperativeMatrixElementType elemType, - Builder::CooperativeMatrixLayout srcLayout, - const Twine &instName, llvm::Instruction *insertPos) { +Value *LowerCooperativeMatrix::cooperativeMatrixTranspose(llvm::Value *matrix, CooperativeMatrixElementType elemType, + CooperativeMatrixLayout srcLayout, const Twine &instName, + llvm::Instruction *insertPos) { BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); @@ -1390,7 +1384,7 @@ Value *LowerCooperativeMatrix::cooperativeMatrixTranspose(llvm::Value *matrix, Value *dpp8 = builder.getInt32(1 | 0 << 3 | 3 << 6 | 2 << 9 | 5 << 12 | 4 << 15 | 7 << 18 | 6 << 21); Value *matrixShuffle = builder.CreateMapToSimpleType(mapFuncDpp8, matrix, {dpp8}); - if (elemType == Builder::CooperativeMatrixElementType::Int8) { + if (elemType == CooperativeMatrixElementType::Int8) { // 1st step: {3_0:2_0:1_0:0_0} {3_1:2_1:1_1:0_1} -> // {0_1:0_0:2_1:2_0} {1_1:1_0:3_1:3_0} @@ -1417,8 +1411,7 @@ Value *LowerCooperativeMatrix::cooperativeMatrixTranspose(llvm::Value *matrix, vecStride = 1; laneStride = 4; - } else if (elemType == Builder::CooperativeMatrixElementType::Int16 || - elemType == Builder::CooperativeMatrixElementType::Float16) { + } else if (elemType == CooperativeMatrixElementType::Int16 || elemType == CooperativeMatrixElementType::Float16) { // lane0:{1_0, 0_0} lane1:{1_1,0_1} -> lane0: {0_1, 0_0} lane1:{1_1, 1_0} matrix = builder.CreateBitCast(matrix, FixedVectorType::get(builder.getInt32Ty(), vecSize)); matrixShuffle = builder.CreateBitCast(matrixShuffle, FixedVectorType::get(builder.getInt32Ty(), vecSize)); @@ -1431,7 +1424,7 @@ Value *LowerCooperativeMatrix::cooperativeMatrixTranspose(llvm::Value *matrix, Value *maskedMatrixLow = builder.CreateAnd(matrix, lowmaskValue); Value *low = builder.CreateSelect(isEvenThread, maskedMatrixLow, builder.CreateLShr(matrixShuffle, shiftValue)); matrix = builder.CreateOr(high, low); - if (elemType == Builder::CooperativeMatrixElementType::Float16) { + if (elemType == CooperativeMatrixElementType::Float16) { matrix = builder.CreateBitCast(matrix, FixedVectorType::get(builder.getFloatTy(), vecSize)); } vecStride = 1; @@ -1535,8 +1528,8 @@ Value *LowerCooperativeMatrix::transposeCooperativeMatrixRecursively(llvm::Value // @param insertPos : Where to insert the instruction Value *LowerCooperativeMatrix::cooperativeMatrixMulAdd(llvm::Value *matrixA, llvm::Value *matrixB, llvm::Value *matrixC, bool isSignedA, bool isSignedB, bool isSatOrOpsel, bool isTied, - Builder::CooperativeMatrixElementType accumElemType, - Builder::CooperativeMatrixElementType factorElemType, + CooperativeMatrixElementType accumElemType, + CooperativeMatrixElementType factorElemType, const Twine &instName, Instruction *insertPos) { BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); @@ -1562,25 +1555,25 @@ Value *LowerCooperativeMatrix::cooperativeMatrixMulAdd(llvm::Value *matrixA, llv Value *matrixD; unsigned waveSize = m_pipelineState->getShaderWaveSize(m_shaderStage); - if (factorElemType == Builder::CooperativeMatrixElementType::Float16 || - factorElemType == Builder::CooperativeMatrixElementType::Int16) { + if (factorElemType == CooperativeMatrixElementType::Float16 || + factorElemType == CooperativeMatrixElementType::Int16) { unsigned factorFlatElemNum = 0; { factorFlatElemNum = 16; } Type *factorType = FixedVectorType::get(builder.transCooperativeMatrixElementType(factorElemType), factorFlatElemNum); matrixA = builder.CreateBitCast(matrixA, factorType); matrixB = builder.CreateBitCast(matrixB, factorType); - } else if (factorElemType == Builder::CooperativeMatrixElementType::Int8) { + } else if (factorElemType == CooperativeMatrixElementType::Int8) { } else { llvm_unreachable("Factor element type is not supported!"); } - if (accumElemType == Builder::CooperativeMatrixElementType::Float32 || - accumElemType == Builder::CooperativeMatrixElementType::Int32) { + if (accumElemType == CooperativeMatrixElementType::Float32 || + accumElemType == CooperativeMatrixElementType::Int32) { matrixC = waveSize == 64 ? builder.CreateShuffleVector(matrixC, ArrayRef({0, 1, 2, 3}), "shuffleVector") : matrixC; - } else if (accumElemType == Builder::CooperativeMatrixElementType::Float16 || - accumElemType == Builder::CooperativeMatrixElementType::Int16) { + } else if (accumElemType == CooperativeMatrixElementType::Float16 || + accumElemType == CooperativeMatrixElementType::Int16) { { matrixC = waveSize == 64 ? builder.CreateShuffleVector(matrixC, ArrayRef({0, 1, 2, 3}), "shuffleVector") : matrixC; @@ -1592,20 +1585,20 @@ Value *LowerCooperativeMatrix::cooperativeMatrixMulAdd(llvm::Value *matrixA, llv llvm_unreachable("Accumulator element type is not supported!"); } - if (factorElemType == Builder::CooperativeMatrixElementType::Float16 && - accumElemType == Builder::CooperativeMatrixElementType::Float32) { + if (factorElemType == CooperativeMatrixElementType::Float16 && + accumElemType == CooperativeMatrixElementType::Float32) { matrixD = builder.CreateIntrinsic(matrixC->getType(), Intrinsic::amdgcn_wmma_f32_16x16x16_f16, {matrixA, matrixB, matrixC}, nullptr, instName); - } else if (factorElemType == Builder::CooperativeMatrixElementType::Int8 && - accumElemType == Builder::CooperativeMatrixElementType::Int32) { + } else if (factorElemType == CooperativeMatrixElementType::Int8 && + accumElemType == CooperativeMatrixElementType::Int32) { matrixD = builder.CreateIntrinsic(matrixC->getType(), Intrinsic::amdgcn_wmma_i32_16x16x16_iu8, {builder.getInt1(isSignedA), matrixA, builder.getInt1(isSignedB), matrixB, matrixC, builder.getInt1(isSatOrOpsel)}, nullptr, instName); - } else if (factorElemType == Builder::CooperativeMatrixElementType::Float16 && - accumElemType == Builder::CooperativeMatrixElementType::Float16) { + } else if (factorElemType == CooperativeMatrixElementType::Float16 && + accumElemType == CooperativeMatrixElementType::Float16) { // Matrix convert to match intrinsic arguments: Wave32: float32*v8->half*v16 // Wave64: float32*v4->half*v8 auto intrinsic = Intrinsic::amdgcn_wmma_f16_16x16x16_f16; @@ -1621,8 +1614,8 @@ Value *LowerCooperativeMatrix::cooperativeMatrixMulAdd(llvm::Value *matrixA, llv llvm_unreachable("The accumulator type is not supported."); } - if (accumElemType == Builder::CooperativeMatrixElementType::Float16 || - accumElemType == Builder::CooperativeMatrixElementType::Int16) { + if (accumElemType == CooperativeMatrixElementType::Float16 || + accumElemType == CooperativeMatrixElementType::Int16) { unsigned coopVeclength = cast(matrixD->getType())->getNumElements(); Type *wordTy = builder.transCooperativeMatrixElementType(accumElemType)->isIntOrIntVectorTy() ? builder.getInt32Ty() @@ -1641,8 +1634,8 @@ Value *LowerCooperativeMatrix::cooperativeMatrixMulAdd(llvm::Value *matrixA, llv return matrixD; } else { // Emulator on NAVI2X - Type *packedTy = (factorElemType == Builder::CooperativeMatrixElementType::Float16) ? builder.getFloatTy() - : builder.getInt32Ty(); + Type *packedTy = + (factorElemType == CooperativeMatrixElementType::Float16) ? builder.getFloatTy() : builder.getInt32Ty(); Value *dotProductValue; Value *threadId = getLaneNumber(builder); @@ -1659,8 +1652,8 @@ Value *LowerCooperativeMatrix::cooperativeMatrixMulAdd(llvm::Value *matrixA, llv }; // matrixC is not reshaped for gfx10 - if (accumElemType == Builder::CooperativeMatrixElementType::Float32 || - accumElemType == Builder::CooperativeMatrixElementType::Int32) { + if (accumElemType == CooperativeMatrixElementType::Float32 || + accumElemType == CooperativeMatrixElementType::Int32) { dotProductValue = PoisonValue::get(FixedVectorType::get(packedTy, 8)); for (unsigned idxc = 0; idxc < 8; ++idxc) { Value *rowlowgroup = builder.CreateMapToSimpleType(mapFuncReadLane, matrixA, builder.getInt32(idxc * 2)); @@ -1668,12 +1661,12 @@ Value *LowerCooperativeMatrix::cooperativeMatrixMulAdd(llvm::Value *matrixA, llv Value *rowData = builder.CreateSelect(isEvenGroup, rowlowgroup, rowhighgroup); Value *mulAB; Value *initAccumulator = builder.CreateExtractElement(matrixC, idxc); - if (factorElemType == Builder::CooperativeMatrixElementType::Float16) { + if (factorElemType == CooperativeMatrixElementType::Float16) { mulAB = createDotProductFp16Fp32(rowData, matrixB, initAccumulator, isSatOrOpsel, instName, insertPos); - } else if (factorElemType == Builder::CooperativeMatrixElementType::Int16) { + } else if (factorElemType == CooperativeMatrixElementType::Int16) { mulAB = createDotProductInt16Int32(rowData, matrixB, initAccumulator, flags, isSatOrOpsel, instName, insertPos); - } else if (factorElemType == Builder::CooperativeMatrixElementType::Int8) { + } else if (factorElemType == CooperativeMatrixElementType::Int8) { mulAB = createDotProductInt8Int32(rowData, matrixB, initAccumulator, flags, isSatOrOpsel, instName, insertPos); } else { @@ -1681,15 +1674,15 @@ Value *LowerCooperativeMatrix::cooperativeMatrixMulAdd(llvm::Value *matrixA, llv } dotProductValue = builder.CreateInsertElement(dotProductValue, mulAB, idxc); } - } else if (accumElemType == Builder::CooperativeMatrixElementType::Int16 || - accumElemType == Builder::CooperativeMatrixElementType::Float16) { + } else if (accumElemType == CooperativeMatrixElementType::Int16 || + accumElemType == CooperativeMatrixElementType::Float16) { dotProductValue = PoisonValue::get(FixedVectorType::get(builder.transCooperativeMatrixElementType(accumElemType), 8)); // For gfx10, A*B:8*float32->16*half C: no reshape for 16bit, still 16*half - Value *colData = convCoopMatrixVecToFlatVec(builder, matrixB, factorElemType, - Builder::CooperativeMatrixLayout::FactorMatrixLayout); + Value *colData = + convCoopMatrixVecToFlatVec(builder, matrixB, factorElemType, CooperativeMatrixLayout::FactorMatrixLayout); matrixC = convCoopMatrixVecToFlatVec(builder, matrixC, accumElemType, - Builder::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout); + CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout); for (unsigned idxc = 0, accIdx = 0; idxc < 16; idxc += 4, accIdx += 2) { Value *rowData1Low = builder.CreateMapToSimpleType(mapFuncReadLane, matrixA, builder.getInt32(idxc)); @@ -1700,17 +1693,17 @@ Value *LowerCooperativeMatrix::cooperativeMatrixMulAdd(llvm::Value *matrixA, llv Value *rowData1 = builder.CreateSelect(isEvenGroup, rowData1Low, rowData1High); Value *rowData2 = builder.CreateSelect(isEvenGroup, rowData2Low, rowData2High); - rowData1 = convCoopMatrixVecToFlatVec(builder, rowData1, factorElemType, - Builder::CooperativeMatrixLayout::FactorMatrixLayout); - rowData2 = convCoopMatrixVecToFlatVec(builder, rowData2, factorElemType, - Builder::CooperativeMatrixLayout::FactorMatrixLayout); + rowData1 = + convCoopMatrixVecToFlatVec(builder, rowData1, factorElemType, CooperativeMatrixLayout::FactorMatrixLayout); + rowData2 = + convCoopMatrixVecToFlatVec(builder, rowData2, factorElemType, CooperativeMatrixLayout::FactorMatrixLayout); Value *mulAB1; Value *mulAB2; Value *accumulator1 = builder.CreateExtractElement(matrixC, accIdx); Value *accumulator2 = builder.CreateExtractElement(matrixC, accIdx + 1); - if (accumElemType == Builder::CooperativeMatrixElementType::Float16) { + if (accumElemType == CooperativeMatrixElementType::Float16) { mulAB1 = createDotProductFp16Fp16(rowData1, colData, accumulator1, isSatOrOpsel, instName, insertPos); mulAB2 = createDotProductFp16Fp16(rowData2, colData, accumulator2, isSatOrOpsel, instName, insertPos); } else { @@ -1724,7 +1717,7 @@ Value *LowerCooperativeMatrix::cooperativeMatrixMulAdd(llvm::Value *matrixA, llv } dotProductValue = convFlatVecToCoopMatrixVec(builder, dotProductValue, accumElemType, - Builder::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout); + CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout); } else { llvm_unreachable("The accumulator type is not supported."); } @@ -2005,7 +1998,7 @@ void LowerCooperativeMatrix::visitCooperativeRowAccLoadOp(CooperativeRowAccLoadO auto dataPtr = load.getPointer(); auto stride = load.getStride(); - auto elemType = static_cast(load.getElemType()); + auto elemType = load.getElemType(); auto memoryAccess = load.getMemoryAccess(); assert(builder.transCooperativeMatrixElementType(elemType) == load.getType()); @@ -2019,9 +2012,9 @@ void LowerCooperativeMatrix::visitCooperativeRowAccLoadOp(CooperativeRowAccLoadO stride = builder.CreateExactSDiv(stride, builder.getInt32(dataBitwidth / 8)); // calc memoryAccess - bool isVolatile = memoryAccess & Builder::MemoryAccessVolatileMask; - bool isCoherent = memoryAccess & Builder::MemoryAccessCoherentMask; - bool isTemporal = memoryAccess & Builder::MemoryAccessTemporalMask; + bool isVolatile = (unsigned)(memoryAccess) & (unsigned)(CooperativeMatrixMemoryAccess::MemoryAccessVolatileMask); + bool isCoherent = (unsigned)(memoryAccess) & (unsigned)(CooperativeMatrixMemoryAccess::MemoryAccessCoherentMask); + bool isTemporal = (unsigned)(memoryAccess) & (unsigned)(CooperativeMatrixMemoryAccess::MemoryAccessTemporalMask); Value *threadId = getLaneNumber(builder); Value *colOffsetPerLane = builder.CreateSRem(threadId, builder.getInt32(16)); @@ -2048,11 +2041,11 @@ void LowerCooperativeMatrix::visitCooperativeRowAccStoreOp(CooperativeRowAccStor auto dataPtr = store.getPointer(); auto stride = store.getStride(); - auto elemType = static_cast(store.getElemType()); + auto elemType = store.getElemType(); auto memoryAccess = store.getMemoryAccess(); - auto val = store.getValue(); + auto data = store.getData(); - assert(builder.transCooperativeMatrixElementType(elemType) == val->getType()); + assert(builder.transCooperativeMatrixElementType(elemType) == data->getType()); // Calc element offset in memory Type *elemTy = builder.transCooperativeMatrixElementType(elemType); @@ -2063,16 +2056,16 @@ void LowerCooperativeMatrix::visitCooperativeRowAccStoreOp(CooperativeRowAccStor stride = builder.CreateExactSDiv(stride, builder.getInt32(dataBitwidth / 8)); // calc memoryAccess - bool isVolatile = memoryAccess & Builder::MemoryAccessVolatileMask; - bool isCoherent = memoryAccess & Builder::MemoryAccessCoherentMask; - bool isTemporal = memoryAccess & Builder::MemoryAccessTemporalMask; + bool isVolatile = (unsigned)(memoryAccess) & (unsigned)(CooperativeMatrixMemoryAccess::MemoryAccessVolatileMask); + bool isCoherent = (unsigned)(memoryAccess) & (unsigned)(CooperativeMatrixMemoryAccess::MemoryAccessCoherentMask); + bool isTemporal = (unsigned)(memoryAccess) & (unsigned)(CooperativeMatrixMemoryAccess::MemoryAccessTemporalMask); Value *threadId = getLaneNumber(builder); Value *colOffsetPerLane = builder.CreateSRem(threadId, builder.getInt32(16)); Value *offset = builder.CreateMul(colOffsetPerLane, stride); Value *elemPtr = builder.CreateGEP(elemTy, dataPtr, offset); - Value *elemVal = builder.CreateStore(val, elemPtr, isVolatile); + Value *elemVal = builder.CreateStore(data, elemPtr, isVolatile); if (isCoherent && !(addrSpace == ADDR_SPACE_LOCAL && dataBitwidth < 32)) cast(elemVal)->setAtomic(AtomicOrdering::Unordered); if (isTemporal) @@ -2089,17 +2082,16 @@ void LowerCooperativeMatrix::visitCooperativeRowAccAccumulateModeOp(CooperativeR BuilderBase builder(*m_context); builder.SetInsertPoint(&accumulateMode); - Value *rowAccValue = accumulateMode.getRowAccValue(); - auto elemType = static_cast(accumulateMode.getElemType()); + Value *rowAccVal = accumulateMode.getRowAcc(); + auto elemType = accumulateMode.getElemType(); assert(builder.transCooperativeMatrixElementType(elemType) == accumulateMode.getType()); - assert(accumulateMode.getType() == rowAccValue->getType()); + assert(accumulateMode.getType() == rowAccVal->getType()); - if (m_gfxIp.major >= 12) { - rowAccValue = cooperativeRowAccConvertToAccumulateMode(builder, getLaneNumber(builder), rowAccValue, elemType); - } + if (m_gfxIp.major >= 12) + rowAccVal = cooperativeRowAccConvertToAccumulateMode(builder, getLaneNumber(builder), rowAccVal, elemType); - accumulateMode.replaceAllUsesWith(rowAccValue); + accumulateMode.replaceAllUsesWith(rowAccVal); m_coopRowAccCalls.push_back(&accumulateMode); } @@ -2111,19 +2103,340 @@ void LowerCooperativeMatrix::visitCooperativeRowAccFinalizeModeOp(CooperativeRow BuilderBase builder(*m_context); builder.SetInsertPoint(&finalize); - Value *rowAccValue = finalize.getRowAccValue(); - auto elemType = static_cast(finalize.getElemType()); + Value *rowAccVal = finalize.getRowAcc(); + auto elemType = finalize.getElemType(); assert(builder.transCooperativeMatrixElementType(elemType) == finalize.getType()); - assert(finalize.getType() == rowAccValue->getType()); + assert(finalize.getType() == rowAccVal->getType()); if (m_gfxIp.major >= 12) - rowAccValue = cooperativeRowAccConvertToFinalizeMode(builder, rowAccValue, elemType); + rowAccVal = cooperativeRowAccConvertToFinalizeMode(builder, rowAccVal, elemType); - finalize.replaceAllUsesWith(rowAccValue); + finalize.replaceAllUsesWith(rowAccVal); m_coopRowAccCalls.push_back(&finalize); } +// ===================================================================================================================== +// Visit "CooperativeRowAccSplatOp" instruction +// +// @param inst : The dialect instruction to process +void LowerCooperativeMatrix::visitCooperativeRowAccSplatOp(CooperativeRowAccSplatOp &splat) { + BuilderBase builder(*m_context); + builder.SetInsertPoint(&splat); + + Value *scalar = splat.getScalar(); + + assert(builder.transCooperativeMatrixElementType(splat.getElemType()) == scalar->getType()); + + splat.replaceAllUsesWith(scalar); + m_coopRowAccCalls.push_back(&splat); +} + +// ===================================================================================================================== +// Visit "CooperativeRowAccExpandOp" instruction +// +// @param inst : The dialect instruction to process +void LowerCooperativeMatrix::visitCooperativeRowAccExpandOp(CooperativeRowAccExpandOp &expand) { + BuilderBase builder(*m_context); + builder.SetInsertPoint(&expand); + + auto rowAccVal = expand.getRowAcc(); + auto rowAccElemType = expand.getRowAccElemType(); + auto matrixElemType = expand.getMatrixElemType(); + auto matrixLayout = expand.getMatrixLayout(); + auto colMajor = expand.getColMajor(); + + assert(builder.getCooperativeMatrixTy(matrixElemType, matrixLayout) == expand.getType()); + assert(rowAccElemType == CooperativeMatrixElementType::Float16 || + rowAccElemType == CooperativeMatrixElementType::Float32 || + rowAccElemType == CooperativeMatrixElementType::Int32); + assert(matrixElemType == CooperativeMatrixElementType::Float16 || + matrixElemType == CooperativeMatrixElementType::Float32 || + matrixElemType == CooperativeMatrixElementType::Int32); + + // Element type convert. + if (rowAccElemType == CooperativeMatrixElementType::Float16 && + matrixElemType == CooperativeMatrixElementType::Float32) + rowAccVal = builder.CreateFPExt(rowAccVal, builder.getFloatTy()); + else if (rowAccElemType == CooperativeMatrixElementType::Float32 && + matrixElemType == CooperativeMatrixElementType::Float16) + rowAccVal = builder.CreateFPTrunc(rowAccVal, builder.getHalfTy()); + else + assert(rowAccElemType == matrixElemType); + + assert(matrixLayout == CooperativeMatrixLayout::AccumulatorMatrixLayout); + auto props = getTypeProperties(matrixElemType, matrixLayout); + Type *flatType = + FixedVectorType::get(builder.transCooperativeMatrixElementType(matrixElemType), props.numFlatElements); + Value *flatVec = PoisonValue::get(flatType); + + if (!colMajor) { + for (unsigned idx = 0; idx < props.numFlatElements; idx++) + flatVec = builder.CreateInsertElement(flatVec, rowAccVal, idx); + } else { + auto mapFuncDpp = [](BuilderBase &builder, ArrayRef mappedArgs, + ArrayRef passthroughArgs) -> Value * { + return builder.CreateIntrinsic( + Intrinsic::amdgcn_mov_dpp, builder.getInt32Ty(), + {mappedArgs[0], passthroughArgs[0], passthroughArgs[1], passthroughArgs[2], passthroughArgs[3]}); + }; + + auto waveSize = m_pipelineState->getShaderWaveSize(m_shaderStage); + assert(waveSize == 32 || waveSize == 64); + + DppCtrl shuffleCtrl[4] = {DppCtrl(UINT32_MAX), DppCtrl(UINT32_MAX), DppCtrl(UINT32_MAX), DppCtrl(UINT32_MAX)}; + DppCtrl expandCtrl[8] = {DppCtrl(UINT32_MAX), DppCtrl(UINT32_MAX), DppCtrl(UINT32_MAX), DppCtrl(UINT32_MAX), + DppCtrl(UINT32_MAX), DppCtrl(UINT32_MAX), DppCtrl(UINT32_MAX), DppCtrl(UINT32_MAX)}; + + if (matrixLayout == CooperativeMatrixLayout::AccumulatorMatrixLayout) { + if (waveSize == 64) { + // Gfx11 AccumulatorMatrixLayout F32/I32@Wave64: + // VGPR/Lane . 0 . . . . 1 . . . . 15 . . . . 16 . . . . 31 + // VGPR[8]: C0_0 . . . C0_1 . . . C0_f . . . C1_0 . . . C1_f + // VGPR[9]: C4_0 . . . C4_1 . . . C4_f . . . C5_0 . . . C5_f + // VGPR[10]: C8_0 . . . C8_1 . . . C8_f . . . C9_0 . . . C9_f + // VGPR[11]: Cc_0 . . . Cc_1 . . . Cc_f . . . Cd_0 . . . Cd_f + // VGPR/Lane . 32 . . . 33 . . . . 47 . . . . 48 . . . . 63 + // VGPR[8]: C2_0 . . . C2_1 . . . C2_f . . . C3_0 . . . C3_f + // VGPR[9]: C6_0 . . . C6_1 . . . C6_f . . . C7_0 . . . C7_f + // VGPR[10]: Ca_0 . . . Ca_1 . . . Ca_f . . . Cb_0 . . . Cb_f + // VGPR[11]: Ce_0 . . . Ce_1 . . . Ce_f . . . Cf_0 . . . Cf_f + // Row accumulator data is in finalized state and duplciated in each 16 lanes. + // Change row accumulator data lanes: + // 16 - 31 to [C1, C2, C3, C4, C5, C6, C7, C8, C9, Ca, Cb, Cc, Cd, Ce, Cf, XX]. + // 32 - 47 to [C2, C3, C4, C5, C6, C7, C8, C9, Ca, Cb, Cc, Cd, Ce, Cf, XX, XX]. + // 48 - 63 to [C3, C4, C5, C6, C7, C8, C9, Ca, Cb, Cc, Cd, Ce, Cf, XX, XX, XX]. + shuffleCtrl[1] = DppCtrl::DppRowSl1; + shuffleCtrl[2] = DppCtrl::DppRowSl2; + shuffleCtrl[3] = DppCtrl::DppRowSl3; + expandCtrl[0] = DppCtrl::DppRowShare0; + expandCtrl[1] = DppCtrl::DppRowShare4; + expandCtrl[2] = DppCtrl::DppRowShare8; + expandCtrl[3] = DppCtrl::DppRowShare12; + } else { + // Gfx11 AccumulatorMatrixLayout F32/I32@Wave32: + // VGPR/Lane . 0 . . . . 1 . . . . 15 . . . . 16 . . . . 31 + // VGPR[8]: C0_0 . . . C0_1 . . . C0_f . . . C1_0 . . . C1_f + // VGPR[9]: C2_0 . . . C2_1 . . . C2_f . . . C3_0 . . . C3_f + // VGPR[10]: C4_0 . . . C4_1 . . . C4_f . . . C5_0 . . . C5_f + // VGPR[11]: C6_0 . . . C6_1 . . . C6_f . . . C7_0 . . . C7_f + // VGPR[12]: C8_0 . . . C8_1 . . . C8_f . . . C9_0 . . . C9_f + // VGPR[13]: Ca_0 . . . Ca_1 . . . Ca_f . . . Cb_0 . . . Cb_f + // VGPR[14]: Cc_0 . . . Cc_1 . . . Cc_f . . . Cd_0 . . . Cd_f + // VGPR[15]: Ce_0 . . . Ce_1 . . . Ce_f . . . Cf_0 . . . Cf_f + // Row accumulator data is in finalized state and duplciated in each 16 lanes. + // Change row accumulator data lanes: + // 16 - 31 to [C1, C2, C3, C4, C5, C6, C7, C8, C9, Ca, Cb, Cc, Cd, Ce, Cf, XX]. + shuffleCtrl[1] = DppCtrl::DppRowSl1; + constexpr DppCtrl ctrl[] = {DppCtrl::DppRowShare0, DppCtrl::DppRowShare2, DppCtrl::DppRowShare4, + DppCtrl::DppRowShare6, DppCtrl::DppRowShare8, DppCtrl::DppRowShare10, + DppCtrl::DppRowShare12, DppCtrl::DppRowShare14}; + memcpy(expandCtrl, ctrl, sizeof(ctrl)); + } + } else + llvm_unreachable("unknow layout"); + + Value *rowAccShuffleVal = rowAccVal; + // Shuffle the data in each group of row accumulator data. wave64 have 4 x group16. + for (unsigned idx = 0; idx < 4; idx++) { + if (shuffleCtrl[idx] != DppCtrl(UINT32_MAX)) { + rowAccShuffleVal = + builder.CreateMapToSimpleType(mapFuncDpp, rowAccShuffleVal, + {builder.getInt32((unsigned)(shuffleCtrl[idx])), builder.getInt32(1 << idx), + builder.getInt32(0xF), builder.getInt1(true)}); + } + } + + for (unsigned idx = 0; idx < props.numFlatElements; idx++) { + assert(expandCtrl[idx] != DppCtrl(UINT32_MAX)); + Value *outputVal = + builder.CreateMapToSimpleType(mapFuncDpp, rowAccShuffleVal, + {builder.getInt32((unsigned)(expandCtrl[idx])), builder.getInt32(0xF), + builder.getInt32(0xF), builder.getInt1(true)}); + flatVec = builder.CreateInsertElement(flatVec, outputVal, idx); + } + } + Value *resultVal = convFlatVecToCoopMatrixVec(builder, flatVec, matrixElemType, matrixLayout); + + expand.replaceAllUsesWith(resultVal); + m_coopRowAccCalls.push_back(&expand); +} + +// ===================================================================================================================== +// Visit "CooperativeRowAccSumAccumulateOp" instruction +// +// @param inst : The dialect instruction to process +void LowerCooperativeMatrix::visitCooperativeRowAccSumAccumulateOp(CooperativeRowAccSumAccumulateOp &sumAccumulate) { + BuilderBase builder(*m_context); + builder.SetInsertPoint(&sumAccumulate); + + auto matrixVal = sumAccumulate.getMatrix(); + auto matrixElemType = sumAccumulate.getMatrixElemType(); + auto matrixLayout = sumAccumulate.getMatrixLayout(); + Value *rowAccVal = sumAccumulate.getRowAcc(); + auto rowAccElemType = sumAccumulate.getRowAccElemType(); + auto isSigned = sumAccumulate.getIsSigned(); + + assert(matrixLayout == CooperativeMatrixLayout::FactorMatrixLayout); + + Value *vcFlat = convCoopMatrixVecToFlatVec(builder, matrixVal, matrixElemType, matrixLayout); + const unsigned numElems = cast(vcFlat->getType())->getNumElements(); + + Value *sumVal = rowAccVal; + if (matrixElemType == CooperativeMatrixElementType::Float16) { + assert(numElems % 2 == 0); + + // Use fdot2 for f32 accumulate. + if (rowAccElemType == CooperativeMatrixElementType::Float32) { + Value *constOne = builder.getFpConstant(builder.getHalfTy(), APFloat(1.0)); + Value *constVector = PoisonValue::get(FixedVectorType::get(builder.getHalfTy(), 2)); + constVector = builder.CreateInsertElement(constVector, constOne, uint64_t(0)); + constVector = builder.CreateInsertElement(constVector, constOne, 1); + + for (unsigned i = 0; i < numElems / 2; i++) { + Value *vector = builder.CreateShuffleVector(vcFlat, ArrayRef{int(i * 2), int(i * 2 + 1)}); + sumVal = + builder.CreateIntrinsic(Intrinsic::amdgcn_fdot2, {}, {vector, constVector, sumVal, builder.getFalse()}); + } + } else { + assert(rowAccElemType == CooperativeMatrixElementType::Float16); + for (unsigned i = 0; i < numElems; i++) { + auto val = builder.CreateExtractElement(vcFlat, i); + sumVal = builder.CreateFAdd(val, sumVal); + } + } + } else if (matrixElemType == CooperativeMatrixElementType::Int16) { + assert(rowAccElemType == CooperativeMatrixElementType::Int32); + assert(numElems % 2 == 0); + + // No dot2 for int16 on gfx11 + if (m_gfxIp.major >= 11) { + for (unsigned i = 0; i < numElems; i++) { + auto val = builder.CreateExtractElement(vcFlat, i); + if (isSigned) + val = builder.CreateSExt(val, builder.getInt32Ty()); + else + val = builder.CreateZExt(val, builder.getInt32Ty()); + sumVal = builder.CreateAdd(val, sumVal); + } + } else { + Value *constVector = PoisonValue::get(FixedVectorType::get(builder.getInt16Ty(), 2)); + constVector = builder.CreateInsertElement(constVector, builder.getInt16(1), uint64_t(0)); + constVector = builder.CreateInsertElement(constVector, builder.getInt16(1), 1); + + for (unsigned i = 0; i < numElems / 2; i++) { + Value *vector = builder.CreateShuffleVector(vcFlat, ArrayRef{int(i * 2), int(i * 2 + 1)}); + sumVal = builder.CreateIntrinsic(isSigned ? Intrinsic::amdgcn_sdot2 : Intrinsic::amdgcn_udot2, {}, + {vector, constVector, sumVal, builder.getFalse()}); + } + } + } else if (matrixElemType == CooperativeMatrixElementType::Int8) { + assert(rowAccElemType == CooperativeMatrixElementType::Int32); + assert(numElems % 4 == 0); + + auto packedType = FixedVectorType::get(builder.getInt32Ty(), numElems / 4); + Value *vcPacked = builder.CreateBitCast(vcFlat, packedType); + + auto constPackedOne = builder.getInt32(0x01010101); + // Using dot4 intrinsic for accumulate. + for (unsigned i = 0; i < packedType->getNumElements(); i++) { + auto packedVal = builder.CreateExtractElement(vcPacked, i); + if (m_gfxIp.major >= 11) { + // Use sudot4 for gfx11+ + sumVal = builder.CreateIntrinsic(Intrinsic::amdgcn_sudot4, {}, + {builder.getInt1(isSigned), packedVal, builder.getInt1(isSigned), + constPackedOne, sumVal, builder.getFalse()}); + } else { + // Use sdot4 and udot4 for gfx10 + sumVal = builder.CreateIntrinsic(isSigned ? Intrinsic::amdgcn_sdot4 : Intrinsic::amdgcn_udot4, {}, + {packedVal, constPackedOne, sumVal, builder.getFalse()}); + } + } + } else + llvm_unreachable("not supported element type for CooperativeRowAccSumAccumulate"); + + sumAccumulate.replaceAllUsesWith(sumVal); + m_coopRowAccCalls.push_back(&sumAccumulate); +} + +// ===================================================================================================================== +// Visit "CooperativeRowAccScalarOp" instruction +// +// @param inst : The dialect instruction to process +void LowerCooperativeMatrix::visitCooperativeRowAccScalarOp(CooperativeRowAccScalarOp &scalar) { + BuilderBase builder(*m_context); + builder.SetInsertPoint(&scalar); + + auto elemType = scalar.getElemType(); + Value *rowAccVal = scalar.getRowAcc(); + Value *scalarVal = scalar.getScalar(); + auto coopMatArithOp = scalar.getBinop(); + bool accumulateMode = scalar.getAccumulateMode(); + + assert(builder.transCooperativeMatrixElementType(elemType) == rowAccVal->getType()); + assert(builder.transCooperativeMatrixElementType(elemType) == scalarVal->getType()); + + bool needHandleAccumulateMode = accumulateMode && (m_gfxIp.major >= 12); + + if (needHandleAccumulateMode) { + if (coopMatArithOp == CooperativeMatrixArithOp::FDiv || coopMatArithOp == CooperativeMatrixArithOp::IMul || + coopMatArithOp == CooperativeMatrixArithOp::FMul) { + // Assume above operation have same result in accumulate mode as ScalarOp(A + B, Scalar) = ScalarOp(A, Scalar) + + // ScalarOp(B, Scalar) + needHandleAccumulateMode = false; + } else if (coopMatArithOp == CooperativeMatrixArithOp::IAdd || coopMatArithOp == CooperativeMatrixArithOp::FAdd || + coopMatArithOp == CooperativeMatrixArithOp::ISub || coopMatArithOp == CooperativeMatrixArithOp::FSub) { + // Assume above operation have same result in accumulate mode as ScalarOp(A + B, Scalar) = ScalarOp(A, Scalar) + + // ScalarOp(B, 0) + // Make scalar value only valid part lanes as accumulate mode. + scalarVal = cooperativeRowAccConvertToAccumulateMode(builder, getLaneNumber(builder), scalarVal, elemType); + needHandleAccumulateMode = false; + } + } + + if (needHandleAccumulateMode) + scalarVal = cooperativeRowAccConvertToFinalizeMode(builder, scalarVal, elemType); + + Value *resultVal = nullptr; + switch (coopMatArithOp) { + case CooperativeMatrixArithOp::IAdd: + resultVal = builder.CreateAdd(rowAccVal, scalarVal); + break; + case CooperativeMatrixArithOp::FAdd: + resultVal = builder.CreateFAdd(rowAccVal, scalarVal); + break; + case CooperativeMatrixArithOp::ISub: + resultVal = builder.CreateSub(rowAccVal, scalarVal); + break; + case CooperativeMatrixArithOp::FSub: + resultVal = builder.CreateFSub(rowAccVal, scalarVal); + break; + case CooperativeMatrixArithOp::IMul: + resultVal = builder.CreateMul(rowAccVal, scalarVal); + break; + case CooperativeMatrixArithOp::FMul: + resultVal = builder.CreateFMul(rowAccVal, scalarVal); + break; + case CooperativeMatrixArithOp::FDiv: + resultVal = builder.CreateFDiv(rowAccVal, scalarVal); + break; + case CooperativeMatrixArithOp::SDiv: + resultVal = builder.CreateSDiv(rowAccVal, scalarVal); + break; + case CooperativeMatrixArithOp::UDiv: + resultVal = builder.CreateUDiv(rowAccVal, scalarVal); + break; + default: + llvm_unreachable("unsupported binary operation for cooperative row acc!"); + } + + if (needHandleAccumulateMode) + resultVal = cooperativeRowAccConvertToAccumulateMode(builder, getLaneNumber(builder), resultVal, elemType); + + scalar.replaceAllUsesWith(resultVal); + m_coopRowAccCalls.push_back(&scalar); +} + // ===================================================================================================================== // Convert row acc to finalize mode by adding the interleave 16 lanes. // @@ -2131,7 +2444,7 @@ void LowerCooperativeMatrix::visitCooperativeRowAccFinalizeModeOp(CooperativeRow // @param rowAccVal : The cooperative rowAcc value // @param elemType : The component type of the rowAcc value Value *LowerCooperativeMatrix::cooperativeRowAccConvertToFinalizeMode(BuilderBase &builder, llvm::Value *rowAccVal, - Builder::CooperativeMatrixElementType elemType) { + CooperativeMatrixElementType elemType) { unsigned LaneSelBits[2] = {0x76543210, 0xfedcba98}; auto mapFuncX16 = [](BuilderBase &builder, ArrayRef mappedArgs, ArrayRef passthroughArgs) -> Value * { @@ -2151,16 +2464,16 @@ Value *LowerCooperativeMatrix::cooperativeRowAccConvertToFinalizeMode(BuilderBas {builder.getInt32(LaneSelBits[0]), builder.getInt32(LaneSelBits[1]), builder.getFalse(), builder.getFalse()}); switch (elemType) { - case Builder::CooperativeMatrixElementType::Float32: - case Builder::CooperativeMatrixElementType::Float16: + case CooperativeMatrixElementType::Float32: + case CooperativeMatrixElementType::Float16: rowAccVal = builder.CreateFAdd(rowAccVal, swapped); break; - case Builder::CooperativeMatrixElementType::Int32: + case CooperativeMatrixElementType::Int32: rowAccVal = builder.CreateAdd(rowAccVal, swapped); break; - case Builder::CooperativeMatrixElementType::Int8: - case Builder::CooperativeMatrixElementType::Int16: - case Builder::CooperativeMatrixElementType::Float16Packed: + case CooperativeMatrixElementType::Int8: + case CooperativeMatrixElementType::Int16: + case CooperativeMatrixElementType::Float16Packed: llvm_unreachable("not supported element type for row acc"); default: llvm_unreachable("unknown element type"); @@ -2176,24 +2489,23 @@ Value *LowerCooperativeMatrix::cooperativeRowAccConvertToFinalizeMode(BuilderBas // @param rowAccVal : The cooperative rowAcc value // @param threadId : The current lane index // @param elemType : The component type of the rowAcc value -Value * -LowerCooperativeMatrix::cooperativeRowAccConvertToAccumulateMode(BuilderBase &builder, llvm::Value *rowAccVal, - llvm::Value *threadId, - Builder::CooperativeMatrixElementType elemType) { +Value *LowerCooperativeMatrix::cooperativeRowAccConvertToAccumulateMode(BuilderBase &builder, llvm::Value *rowAccVal, + llvm::Value *threadId, + CooperativeMatrixElementType elemType) { Value *zero = nullptr; switch (elemType) { - case Builder::CooperativeMatrixElementType::Float32: + case CooperativeMatrixElementType::Float32: zero = builder.getFpConstant(builder.getFloatTy(), APFloat(0.0)); break; - case Builder::CooperativeMatrixElementType::Float16: + case CooperativeMatrixElementType::Float16: zero = builder.getFpConstant(builder.getHalfTy(), APFloat(0.0)); break; - case Builder::CooperativeMatrixElementType::Int32: + case CooperativeMatrixElementType::Int32: zero = builder.getInt32(0); break; - case Builder::CooperativeMatrixElementType::Int8: - case Builder::CooperativeMatrixElementType::Int16: - case Builder::CooperativeMatrixElementType::Float16Packed: + case CooperativeMatrixElementType::Int8: + case CooperativeMatrixElementType::Int16: + case CooperativeMatrixElementType::Float16Packed: llvm_unreachable("not supported element type for cooperative row acc"); default: llvm_unreachable("unknown element type"); @@ -2215,6 +2527,10 @@ void LowerCooperativeMatrix::processCoopRowAccFunction(Module &module) { .add(&LowerCooperativeMatrix::visitCooperativeRowAccStoreOp) .add(&LowerCooperativeMatrix::visitCooperativeRowAccAccumulateModeOp) .add(&LowerCooperativeMatrix::visitCooperativeRowAccFinalizeModeOp) + .add(&LowerCooperativeMatrix::visitCooperativeRowAccSplatOp) + .add(&LowerCooperativeMatrix::visitCooperativeRowAccExpandOp) + .add(&LowerCooperativeMatrix::visitCooperativeRowAccSumAccumulateOp) + .add(&LowerCooperativeMatrix::visitCooperativeRowAccScalarOp) .build(); visitor.visit(*this, module); diff --git a/lgc/patch/LowerGpuRt.cpp b/lgc/patch/LowerGpuRt.cpp index abcc8d2733..d60766e6ff 100644 --- a/lgc/patch/LowerGpuRt.cpp +++ b/lgc/patch/LowerGpuRt.cpp @@ -77,6 +77,7 @@ PreservedAnalyses LowerGpuRt::run(Module &module, ModuleAnalysisManager &analysi .add(&LowerGpuRt::visitGetFlattenedGroupThreadId) .add(&LowerGpuRt::visitFloatWithRoundMode) .add(&LowerGpuRt::visitGpurtDispatchThreadIdFlatOp) + .add(&LowerGpuRt::visitContinuationStackIsGlobalOp) .build(); visitor.visit(*this, module); @@ -444,4 +445,16 @@ void LowerGpuRt::visitGpurtDispatchThreadIdFlatOp(GpurtDispatchThreadIdFlatOp &i m_funcsToLower.insert(inst.getCalledFunction()); } +// ===================================================================================================================== +// Visit "GpurtContinuationStackIsGlobalOp" instruction +// +// @param inst : The dialect instruction to process +void LowerGpuRt::visitContinuationStackIsGlobalOp(GpurtContinuationStackIsGlobalOp &inst) { + m_builder->SetInsertPoint(&inst); + bool isGlobal = m_pipelineState->getOptions().cpsFlags & CpsFlagStackInGlobalMem; + inst.replaceAllUsesWith(m_builder->getInt1(isGlobal)); + m_callsToLower.push_back(&inst); + m_funcsToLower.insert(inst.getCalledFunction()); +} + } // namespace lgc diff --git a/lgc/patch/LowerSubgroupOps.cpp b/lgc/patch/LowerSubgroupOps.cpp index f22a74aff4..dcbca0fc48 100644 --- a/lgc/patch/LowerSubgroupOps.cpp +++ b/lgc/patch/LowerSubgroupOps.cpp @@ -30,28 +30,13 @@ */ #include "lgc/patch/LowerSubgroupOps.h" -#include "ShaderMerger.h" #include "lgc/LgcContext.h" #include "lgc/LgcDialect.h" -#include "lgc/builder/BuilderImpl.h" -#include "lgc/patch/ShaderInputs.h" -#include "lgc/state/AbiMetadata.h" -#include "lgc/state/AbiUnlinked.h" -#include "lgc/state/IntrinsDefs.h" -#include "lgc/state/PalMetadata.h" -#include "lgc/state/PipelineShaders.h" +#include "lgc/builder/SubgroupBuilder.h" #include "lgc/state/PipelineState.h" -#include "lgc/state/TargetInfo.h" -#include "lgc/util/AddressExtender.h" -#include "lgc/util/BuilderBase.h" #include "llvm-dialects/Dialect/Visitor.h" -#include "llvm/Analysis/AliasAnalysis.h" // for MemoryEffects -#include "llvm/IR/IntrinsicsAMDGPU.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include #define DEBUG_TYPE "lgc-lower-subgroup-ops" @@ -60,50 +45,6 @@ using namespace lgc; namespace lgc { -class SubgroupLoweringBuilder : public BuilderImpl { -public: - SubgroupLoweringBuilder(Pipeline *pipeline) : BuilderImpl(pipeline) {} - - // ===================================================================================================================== - // Create a subgroup elect. - // - // @param instName : Name to give instruction(s) - llvm::Value *CreateSubgroupElect(const llvm::Twine &instName = ""); - - // Create a subgroup any - // - // @param value : The value to compare - // @param instName : Name to give instruction(s) - llvm::Value *CreateSubgroupAny(llvm::Value *const value, const llvm::Twine &instName = ""); -}; - -// ===================================================================================================================== -// Create a subgroup elect call. -// -// @param instName : Name to give final instruction. -Value *SubgroupLoweringBuilder::CreateSubgroupElect(const Twine &instName) { - return CreateICmpEQ(CreateSubgroupMbcnt(createGroupBallot(getTrue()), ""), getInt32(0)); -} - -// ===================================================================================================================== -// Create a subgroup any call. -// -// @param value : The value to compare across the subgroup. Must be an integer type. -// @param instName : Name to give final instruction. -Value *SubgroupLoweringBuilder::CreateSubgroupAny(Value *const value, const Twine &instName) { - Value *result = CreateICmpNE(createGroupBallot(value), getInt64(0)); - result = CreateSelect(CreateUnaryIntrinsic(Intrinsic::is_constant, value), value, result); - - // Helper invocations of whole quad mode should be included in the subgroup vote execution - const auto &fragmentMode = m_pipelineState->getShaderModes()->getFragmentShaderMode(); - if (m_shaderStage == ShaderStage::Fragment && !fragmentMode.waveOpsExcludeHelperLanes) { - result = CreateZExt(result, getInt32Ty()); - result = CreateIntrinsic(Intrinsic::amdgcn_softwqm, {getInt32Ty()}, {result}); - result = CreateTrunc(result, getInt1Ty()); - } - return result; -} - // ===================================================================================================================== // Executes this LLVM patching pass on the specified LLVM module. // @@ -112,12 +53,11 @@ Value *SubgroupLoweringBuilder::CreateSubgroupAny(Value *const value, const Twin // @returns : The preserved analyses (The analyses that are still valid after this pass) PreservedAnalyses LowerSubgroupOps::run(Module &module, ModuleAnalysisManager &analysisManager) { PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); - // PipelineShadersResult &pipelineShaders = analysisManager.getResult(module); LLVM_DEBUG(dbgs() << "Run the pass lower subgroup ops\n"); m_pipelineState = pipelineState; - SubgroupLoweringBuilder builder(m_pipelineState); + SubgroupBuilder builder(m_pipelineState); m_builder = &builder; static const auto visitor = llvm_dialects::VisitorBuilder() .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) diff --git a/lgc/patch/NggPrimShader.cpp b/lgc/patch/NggPrimShader.cpp index e67492df1c..ba73df4900 100644 --- a/lgc/patch/NggPrimShader.cpp +++ b/lgc/patch/NggPrimShader.cpp @@ -550,7 +550,8 @@ Function *NggPrimShader::generate(Function *esMain, Function *gsMain, Function * uint64_t inRegMask = 0; auto primShaderTy = getPrimShaderType(inRegMask); - Function *primShader = Function::Create(primShaderTy, GlobalValue::ExternalLinkage, lgcName::NggPrimShaderEntryPoint); + Function *primShader = + createFunctionHelper(primShaderTy, GlobalValue::ExternalLinkage, module, lgcName::NggPrimShaderEntryPoint); primShader->setDLLStorageClass(GlobalValue::DLLExportStorageClass); const unsigned waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Geometry); primShader->addFnAttr("target-features", ",+wavefrontsize" + std::to_string(waveSize)); // Set wavefront size @@ -6124,9 +6125,10 @@ void NggPrimShader::processVertexAttribExport(Function *&target) { coherent.bits.glc = true; coherent.bits.slc = true; } - m_builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_store, attribValue->getType(), - {attribValue, attribRingBufDesc, vertexIndex, locationOffset, ringOffset, - m_builder.getInt32(coherent.u32All)}); + auto store = m_builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_store, attribValue->getType(), + {attribValue, attribRingBufDesc, vertexIndex, locationOffset, ringOffset, + m_builder.getInt32(coherent.u32All)}); + (void)store; removedCalls.push_back(call); } diff --git a/lgc/patch/Patch.cpp b/lgc/patch/Patch.cpp index e626b4364a..d8ae48f12a 100644 --- a/lgc/patch/Patch.cpp +++ b/lgc/patch/Patch.cpp @@ -30,7 +30,7 @@ */ #include "lgc/patch/Patch.h" #include "PatchNullFragShader.h" -#include "continuations/Continuations.h" +#include "llvmraytracing/Continuations.h" #include "lgc/LgcContext.h" #include "lgc/PassManager.h" #include "lgc/Pipeline.h" @@ -135,14 +135,33 @@ void Patch::addPasses(PipelineState *pipelineState, lgc::PassManager &passMgr, T if (patchTimer) LgcContext::createAndAddStartStopTimer(passMgr, patchTimer, true); + const auto indirectMode = pipelineState->getOptions().rtIndirectMode; + if (indirectMode == RayTracingIndirectMode::ContinuationsContinufy || + indirectMode == RayTracingIndirectMode::Continuations) { + if (indirectMode == RayTracingIndirectMode::ContinuationsContinufy) { + passMgr.addPass(Continufy()); + // NOTE: LowerGpuRt needs to be run before continuation transform for continufy mode because some GPURT dialects + // that continuation transform does not support are used. + passMgr.addPass(LowerGpuRt()); + } else { + passMgr.addPass(LowerRaytracingPipelinePass()); + } + + addLgcContinuationTransform(passMgr); + } + if (pipelineState->getOptions().useGpurt) { // NOTE: Lower GPURT operations and run InstCombinePass before builder replayer, because some Op are going to be // turned into constant value, so that we can eliminate unused `@lgc.load.buffer.desc` before getting into // replayer. Otherwise, unnecessary `writes_uavs` and `uses_uav` may be set. + // NOTE: Lower GPURT operations after continuations transform, because we will inline some functions from GPURT + // library which may use gpurt dialect, and the library itself doesn't run any LGC passes. passMgr.addPass(LowerGpuRt()); passMgr.addPass(createModuleToFunctionPassAdaptor(InstCombinePass())); } + // NOTE: Replay after continuations transform, because we will inline some functions from GPURT library which may use + // lgc record ops, and the library itself doesn't run any LGC passes. // We're using BuilderRecorder; replay the Builder calls now passMgr.addPass(BuilderReplayer()); passMgr.addPass(LowerSubgroupOps()); @@ -153,17 +172,6 @@ void Patch::addPasses(PipelineState *pipelineState, lgc::PassManager &passMgr, T "// LLPC pipeline before-patching results\n")); } - const auto indirectMode = pipelineState->getOptions().rtIndirectMode; - if (indirectMode == RayTracingIndirectMode::ContinuationsContinufy || - indirectMode == RayTracingIndirectMode::Continuations) { - if (indirectMode == RayTracingIndirectMode::ContinuationsContinufy) - passMgr.addPass(Continufy()); - else - passMgr.addPass(LowerRaytracingPipelinePass()); - - addLgcContinuationTransform(passMgr); - } - passMgr.addPass(IPSCCPPass()); passMgr.addPass(LowerDebugPrintf()); diff --git a/lgc/patch/PatchBufferOp.cpp b/lgc/patch/PatchBufferOp.cpp index 49b29dcb7d..34c904525e 100644 --- a/lgc/patch/PatchBufferOp.cpp +++ b/lgc/patch/PatchBufferOp.cpp @@ -1693,6 +1693,8 @@ Value *BufferOpLowering::createGlobalPointerAccess(Value *const bufferDesc, Valu // If null descriptor or extended robust buffer access is allowed, we will create a branch to perform normal global // access based on the valid check. Value *isValidAccess = m_builder.getTrue(); + BasicBlock *const origBlock = inst.getParent(); + Instruction *terminator = nullptr; if (m_pipelineState.getOptions().allowNullDescriptor || m_pipelineState.getOptions().enableExtendedRobustBufferAccess) { Value *isNonNullDesc = m_builder.getTrue(); @@ -1702,13 +1704,11 @@ Value *BufferOpLowering::createGlobalPointerAccess(Value *const bufferDesc, Valu } Value *isInBound = m_pipelineState.getOptions().enableExtendedRobustBufferAccess ? inBound : m_builder.getTrue(); isValidAccess = m_builder.CreateAnd(isNonNullDesc, isInBound); - } - - BasicBlock *const origBlock = inst.getParent(); - Instruction *const terminator = SplitBlockAndInsertIfThen(isValidAccess, &inst, false); + terminator = SplitBlockAndInsertIfThen(isValidAccess, &inst, false); + m_builder.SetInsertPoint(terminator); + } // Global pointer access - m_builder.SetInsertPoint(terminator); Value *baseAddr = getBaseAddressFromBufferDesc(bufferDesc); Value *newOffset = nullptr; if (m_pipelineState.getOptions().enableExtendedRobustBufferAccess) { @@ -1727,12 +1727,15 @@ Value *BufferOpLowering::createGlobalPointerAccess(Value *const bufferDesc, Valu // Store inst doesn't need return a value from a phi node if (!dyn_cast(&inst)) { + // Return early if the block is not split + if (!terminator) + return newValue; + m_builder.SetInsertPoint(&inst); assert(!type->isVoidTy()); auto phi = m_builder.CreatePHI(type, 2, "newValue"); phi->addIncoming(Constant::getNullValue(type), origBlock); phi->addIncoming(newValue, terminator->getParent()); - return phi; } return nullptr; diff --git a/lgc/patch/PatchCopyShader.cpp b/lgc/patch/PatchCopyShader.cpp index 524141b467..e083ddc79a 100644 --- a/lgc/patch/PatchCopyShader.cpp +++ b/lgc/patch/PatchCopyShader.cpp @@ -134,7 +134,8 @@ PreservedAnalyses PatchCopyShader::run(Module &module, ModuleAnalysisManager &an auto entryPointTy = FunctionType::get(builder.getVoidTy(), argTys, false); // Create function for the copy shader entrypoint, and insert it before the FS (if there is one). - auto entryPoint = Function::Create(entryPointTy, GlobalValue::ExternalLinkage, lgcName::CopyShaderEntryPoint); + auto entryPoint = + createFunctionHelper(entryPointTy, GlobalValue::ExternalLinkage, &module, lgcName::CopyShaderEntryPoint); entryPoint->setDLLStorageClass(GlobalValue::DLLExportStorageClass); entryPoint->setCallingConv(CallingConv::AMDGPU_VS); diff --git a/lgc/patch/PatchEntryPointMutate.cpp b/lgc/patch/PatchEntryPointMutate.cpp index fc677a67ea..7237eae3fb 100644 --- a/lgc/patch/PatchEntryPointMutate.cpp +++ b/lgc/patch/PatchEntryPointMutate.cpp @@ -205,8 +205,7 @@ static void splitIntoI32(const DataLayout &layout, IRBuilder<> &builder, ArrayRe Value *vecDword = builder.CreateBitCast(x, FixedVectorType::get(builder.getInt32Ty(), size / 32)); splitIntoI32(layout, builder, vecDword, output); } else { - if (!xType->isIntegerTy()) - x = builder.CreateBitCast(x, builder.getInt32Ty()); + x = builder.CreateZExtOrBitCast(x, builder.getInt32Ty()); output.push_back(x); } } @@ -711,7 +710,7 @@ Function *PatchEntryPointMutate::lowerCpsFunction(Function *func, ArrayRefgetFunctionType()->params().drop_front(1); newArgTys.append(remainingArgs.begin(), remainingArgs.end()); FunctionType *newFuncTy = FunctionType::get(builder.getVoidTy(), newArgTys, false); - auto newFunc = Function::Create(newFuncTy, func->getLinkage()); + auto newFunc = createFunctionHelper(newFuncTy, func->getLinkage(), func->getParent()); newFunc->copyAttributesFrom(func); newFunc->copyMetadata(func, 0); newFunc->takeName(func); @@ -1595,18 +1594,6 @@ uint64_t PatchEntryPointMutate::generateEntryPointArgTys(ShaderInputs *shaderInp } } - // NOTE: We encounter a HW defect on GFX9. When there is only one user SGPR (corresponds to global table, s0), - // the SGPR corresponding to scratch offset (s2) of PS is incorrectly initialized. This leads to invalid scratch - // memory access, causing GPU hang. Thus, we detect such case and add a dummy user SGPR in order not to map scratch - // offset to s2. - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major == 9 && m_shaderStage == ShaderStage::Fragment) { - if (userDataIdx == 1) { - argTys.push_back(builder.getInt32Ty()); - argNames.push_back("dummyInit"); - userDataIdx += 1; - } - } - intfData->userDataCount = userDataIdx; inRegMask = (1ull << argTys.size()) - 1; @@ -2025,19 +2012,17 @@ PatchEntryPointMutate::UserDataUsage *PatchEntryPointMutate::getUserDataUsage(Sh // // @param stage : Shader stage ShaderStageEnum PatchEntryPointMutate::getMergedShaderStage(ShaderStageEnum stage) const { - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 9) { - switch (stage) { - case ShaderStage::Vertex: - if (m_pipelineState->hasShaderStage(ShaderStage::TessControl)) - return ShaderStage::TessControl; - LLVM_FALLTHROUGH; - case ShaderStage::TessEval: - if (m_pipelineState->hasShaderStage(ShaderStage::Geometry)) - return ShaderStage::Geometry; - break; - default: - break; - } + switch (stage) { + case ShaderStage::Vertex: + if (m_pipelineState->hasShaderStage(ShaderStage::TessControl)) + return ShaderStage::TessControl; + LLVM_FALLTHROUGH; + case ShaderStage::TessEval: + if (m_pipelineState->hasShaderStage(ShaderStage::Geometry)) + return ShaderStage::Geometry; + break; + default: + break; } return stage; } diff --git a/lgc/patch/PatchInOutImportExport.cpp b/lgc/patch/PatchInOutImportExport.cpp index 5afbe2b3a2..704f1fdedf 100644 --- a/lgc/patch/PatchInOutImportExport.cpp +++ b/lgc/patch/PatchInOutImportExport.cpp @@ -111,8 +111,8 @@ PreservedAnalyses PatchInOutImportExport::run(Module &module, ModuleAnalysisMana } // Create the global variable that is to model LDS - // NOTE: ES -> GS ring is always on-chip on GFX9. - if (m_hasTs || (m_hasGs && (m_pipelineState->isGsOnChip() || m_gfxIp.major >= 9))) + // NOTE: ES -> GS ring is always on-chip on GFX10+. + if (m_hasTs || m_hasGs) m_lds = Patch::getLdsVariable(m_pipelineState, m_module); // Set buffer formats based on specific GFX @@ -259,8 +259,8 @@ void PatchInOutImportExport::processShader() { } } - // Thread ID will be used in on-chip GS offset calculation (ES -> GS ring is always on-chip on GFX9) - bool useThreadId = (m_hasGs && (m_pipelineState->isGsOnChip() || m_gfxIp.major >= 9)); + // Thread ID will be used in on-chip GS offset calculation (ES -> GS ring is always on-chip on GFX10+) + bool useThreadId = m_hasGs; // Thread ID will also be used for stream-out buffer export const bool enableXfb = m_pipelineState->enableXfb(); @@ -466,9 +466,10 @@ void PatchInOutImportExport::processShader() { bool isHwLocalInvocationId = cast(reconfigCall->getArgOperand(1))->getZExtValue(); if ((layout.microLayout == WorkgroupLayout::Quads) || (layout.macroLayout == WorkgroupLayout::SexagintiQuads)) { + BuilderBase builder(reconfigCall); localInvocationId = reconfigWorkgroupLayout(localInvocationId, layout.macroLayout, layout.microLayout, workgroupSizeX, - workgroupSizeY, workgroupSizeZ, isHwLocalInvocationId, reconfigCall); + workgroupSizeY, workgroupSizeZ, isHwLocalInvocationId, builder); } } reconfigCall->replaceAllUsesWith(localInvocationId); @@ -791,7 +792,7 @@ void PatchInOutImportExport::visitCallInst(CallInst &callInst) { Value *locOffset = outputImportGeneric.getLocOffset(); if (isa(locOffset)) { origLoc += cast(locOffset)->getZExtValue(); - locOffset = ConstantInt::get(Type::getInt32Ty(*m_context), 0); + locOffset = builder.getInt32(0); } // NOTE: For generic outputs of tessellation control shader, they could be per-patch ones. @@ -869,7 +870,7 @@ void PatchInOutImportExport::visitCallInst(CallInst &callInst) { switch (m_shaderStage) { case ShaderStage::Vertex: { - patchVsBuiltInOutputExport(output, builtInId, &callInst); + patchVsBuiltInOutputExport(output, builtInId, builder); break; } case ShaderStage::TessControl: { @@ -877,11 +878,11 @@ void PatchInOutImportExport::visitCallInst(CallInst &callInst) { Value *elemIdx = isDontCareValue(callInst.getOperand(1)) ? nullptr : callInst.getOperand(1); Value *vertexIdx = isDontCareValue(callInst.getOperand(2)) ? nullptr : callInst.getOperand(2); - patchTcsBuiltInOutputExport(output, builtInId, elemIdx, vertexIdx, &callInst); + patchTcsBuiltInOutputExport(output, builtInId, elemIdx, vertexIdx, builder); break; } case ShaderStage::TessEval: { - patchTesBuiltInOutputExport(output, builtInId, &callInst); + patchTesBuiltInOutputExport(output, builtInId, builder); break; } case ShaderStage::Geometry: { @@ -894,15 +895,15 @@ void PatchInOutImportExport::visitCallInst(CallInst &callInst) { Value *vertexOrPrimitiveIdx = callInst.getOperand(2); bool isPerPrimitive = cast(callInst.getOperand(3))->getZExtValue() != 0; - patchMeshBuiltInOutputExport(output, builtInId, elemIdx, vertexOrPrimitiveIdx, isPerPrimitive, &callInst); + patchMeshBuiltInOutputExport(output, builtInId, elemIdx, vertexOrPrimitiveIdx, isPerPrimitive, builder); break; } case ShaderStage::Fragment: { - patchFsBuiltInOutputExport(output, builtInId, &callInst); + patchFsBuiltInOutputExport(output, builtInId, builder); break; } case ShaderStage::CopyShader: { - patchCopyShaderBuiltInOutputExport(output, builtInId, &callInst); + patchCopyShaderBuiltInOutputExport(output, builtInId, builder); break; } default: { @@ -1035,7 +1036,7 @@ void PatchInOutImportExport::visitCallInst(CallInst &callInst) { break; } case ShaderStage::CopyShader: { - patchCopyShaderGenericOutputExport(output, loc, &callInst); + addExportInstForGenericOutput(output, loc, 0, builder); break; } default: { @@ -1105,11 +1106,11 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { m_shaderStage == ShaderStage::CopyShader) && (nextStage == ShaderStage::Invalid || nextStage == ShaderStage::Fragment)); - auto zero = ConstantFP::get(Type::getFloatTy(*m_context), 0.0); - auto one = ConstantFP::get(Type::getFloatTy(*m_context), 1.0); - auto poison = PoisonValue::get(Type::getFloatTy(*m_context)); + BuilderBase builder(&retInst); - Instruction *insertPos = &retInst; + auto zero = ConstantFP::get(builder.getFloatTy(), 0.0); + auto one = ConstantFP::get(builder.getFloatTy(), 1.0); + auto poison = PoisonValue::get(builder.getFloatTy()); const bool enableXfb = m_pipelineState->enableXfb(); if (m_shaderStage == ShaderStage::CopyShader && enableXfb) { @@ -1159,7 +1160,7 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { for (auto &caseBranch : switchInst->cases()) { if (caseBranch.getCaseValue()->getZExtValue() == m_pipelineState->getRasterizerState().rasterStream) { // The insert position is updated to this case branch, before the terminator - insertPos = caseBranch.getCaseSuccessor()->getTerminator(); + builder.SetInsertPoint(caseBranch.getCaseSuccessor()->getTerminator()); updated = true; // We must go to return block from this case branch assert(caseBranch.getCaseSuccessor()->getSingleSuccessor() == retInst.getParent()); @@ -1244,16 +1245,16 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { // NOTE: If gl_Position is not present in this shader stage, we have to export a dummy one. if (!usePosition) { Value *args[] = { - ConstantInt::get(Type::getInt32Ty(*m_context), EXP_TARGET_POS_0), // tgt - ConstantInt::get(Type::getInt32Ty(*m_context), 0xF), // en - zero, // src0 - zero, // src1 - zero, // src2 - one, // src3 - ConstantInt::get(Type::getInt1Ty(*m_context), false), // done - ConstantInt::get(Type::getInt1Ty(*m_context), false) // vm + builder.getInt32(EXP_TARGET_POS_0), // tgt + builder.getInt32(0xF), // en + zero, // src0 + zero, // src1 + zero, // src2 + one, // src3 + builder.getInt1(false), // done + builder.getInt1(false) // vm }; - emitCall("llvm.amdgcn.exp.f32", Type::getVoidTy(*m_context), args, {}, insertPos); + builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_exp, args); } // Export gl_ClipDistance[] and gl_CullDistance[] before entry-point returns @@ -1266,11 +1267,11 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { // Extract elements of gl_ClipDistance[] and gl_CullDistance[] std::vector clipDistance; for (unsigned i = 0; i < clipDistanceCount; ++i) - clipDistance.push_back(ExtractValueInst::Create(m_clipDistance, {i}, "", insertPos)); + clipDistance.push_back(builder.CreateExtractValue(m_clipDistance, i)); std::vector cullDistance; for (unsigned i = 0; i < cullDistanceCount; ++i) - cullDistance.push_back(ExtractValueInst::Create(m_cullDistance, {i}, "", insertPos)); + cullDistance.push_back(builder.CreateExtractValue(m_cullDistance, i)); // Merge gl_ClipDistance[] and gl_CullDistance[] std::vector clipCullDistance; @@ -1295,31 +1296,31 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { // NOTE: When misc. export is present, gl_ClipDistance[] or gl_CullDistance[] should start from pos2. unsigned pos = miscExport ? EXP_TARGET_POS_2 : EXP_TARGET_POS_1; Value *args[] = { - ConstantInt::get(Type::getInt32Ty(*m_context), pos), // tgt - ConstantInt::get(Type::getInt32Ty(*m_context), 0xF), // en - clipCullDistance[0], // src0 - clipCullDistance[1], // src1 - clipCullDistance[2], // src2 - clipCullDistance[3], // src3 - ConstantInt::get(Type::getInt1Ty(*m_context), false), // done - ConstantInt::get(Type::getInt1Ty(*m_context), false) // vm + builder.getInt32(pos), // tgt + builder.getInt32(0xF), // en + clipCullDistance[0], // src0 + clipCullDistance[1], // src1 + clipCullDistance[2], // src2 + clipCullDistance[3], // src3 + builder.getInt1(false), // done + builder.getInt1(false) // vm }; - emitCall("llvm.amdgcn.exp.f32", Type::getVoidTy(*m_context), args, {}, insertPos); + builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_exp, args); if (clipCullDistance.size() > 4) { // Do the second exporting Value *args[] = { - ConstantInt::get(Type::getInt32Ty(*m_context), pos + 1), // tgt - ConstantInt::get(Type::getInt32Ty(*m_context), 0xF), // en - clipCullDistance[4], // src0 - clipCullDistance[5], // src1 - clipCullDistance[6], // src2 - clipCullDistance[7], // src3 - ConstantInt::get(Type::getInt1Ty(*m_context), false), // done - ConstantInt::get(Type::getInt1Ty(*m_context), false) // vm + builder.getInt32(pos + 1), // tgt + builder.getInt32(0xF), // en + clipCullDistance[4], // src0 + clipCullDistance[5], // src1 + clipCullDistance[6], // src2 + clipCullDistance[7], // src3 + builder.getInt1(false), // done + builder.getInt1(false) // vm }; - emitCall("llvm.amdgcn.exp.f32", Type::getVoidTy(*m_context), args, {}, insertPos); + builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_exp, args); } // NOTE: We have to export gl_ClipDistance[] or gl_CullDistancep[] via generic outputs as well. @@ -1391,7 +1392,7 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { const unsigned loc = builtInOutLocs.find(BuiltInPrimitiveId)->second; assert(m_primitiveId); - Value *primitiveId = new BitCastInst(m_primitiveId, Type::getFloatTy(*m_context), "", insertPos); + Value *primitiveId = builder.CreateBitCast(m_primitiveId, builder.getFloatTy()); recordVertexAttribExport(loc, {primitiveId, poison, poison, poison}); } @@ -1399,17 +1400,14 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { // Export EdgeFlag if (useEdgeFlag) { - addExportInstForBuiltInOutput(m_edgeFlag, BuiltInEdgeFlag, insertPos); + addExportInstForBuiltInOutput(m_edgeFlag, BuiltInEdgeFlag, builder); } // Export gl_Layer and gl_ViewportIndex before entry-point returns - if (m_gfxIp.major >= 9 && (useLayer || useViewportIndex || enableMultiView)) { + if (useLayer || useViewportIndex || enableMultiView) { Value *viewportIndex = nullptr; Value *layer = nullptr; - Value *viewportIndexAndLayer = ConstantInt::get(Type::getInt32Ty(*m_context), 0); - - BuilderBase builder(*m_context); - builder.SetInsertPoint(insertPos); + Value *viewportIndexAndLayer = builder.getInt32(0); if (m_pipelineState->getInputAssemblyState().multiView == MultiViewMode::PerView) { assert(m_viewIndex); @@ -1443,20 +1441,20 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { viewportIndexAndLayer = builder.CreateOr(viewportIndexAndLayer, layer); } - viewportIndexAndLayer = builder.CreateBitCast(viewportIndexAndLayer, Type::getFloatTy(*m_context)); + viewportIndexAndLayer = builder.CreateBitCast(viewportIndexAndLayer, builder.getFloatTy()); Value *args[] = { - ConstantInt::get(Type::getInt32Ty(*m_context), EXP_TARGET_POS_1), // tgt - ConstantInt::get(Type::getInt32Ty(*m_context), 0x4), // en - poison, // src0 - poison, // src1 - viewportIndexAndLayer, // src2 - poison, // src3 - ConstantInt::get(Type::getInt1Ty(*m_context), false), // done - ConstantInt::get(Type::getInt1Ty(*m_context), false) // vm + builder.getInt32(EXP_TARGET_POS_1), // tgt + builder.getInt32(0x4), // en + poison, // src0 + poison, // src1 + viewportIndexAndLayer, // src2 + poison, // src3 + builder.getInt1(false), // done + builder.getInt1(false) // vm }; - builder.CreateIntrinsic(Intrinsic::amdgcn_exp, Type::getFloatTy(*m_context), args, {}); + builder.CreateIntrinsic(Intrinsic::amdgcn_exp, builder.getFloatTy(), args, {}); // NOTE: We have to export gl_ViewportIndex via generic outputs as well. if (useViewportIndex) { @@ -1471,7 +1469,7 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { assert(builtInOutLocs.find(BuiltInViewportIndex) != builtInOutLocs.end()); const unsigned loc = builtInOutLocs.find(BuiltInViewportIndex)->second; - Value *viewportIndex = builder.CreateBitCast(m_viewportIndex, Type::getFloatTy(*m_context)); + Value *viewportIndex = builder.CreateBitCast(m_viewportIndex, builder.getFloatTy()); recordVertexAttribExport(loc, {viewportIndex, poison, poison, poison}); } @@ -1490,7 +1488,7 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { assert(builtInOutLocs.find(BuiltInLayer) != builtInOutLocs.end()); const unsigned loc = builtInOutLocs.find(BuiltInLayer)->second; - Value *layer = builder.CreateBitCast(m_layer, Type::getFloatTy(*m_context)); + Value *layer = builder.CreateBitCast(m_layer, builder.getFloatTy()); recordVertexAttribExport(loc, {layer, poison, poison, poison}); } @@ -1506,7 +1504,7 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { } // Export vertex attributes that were recorded previously - exportVertexAttribs(insertPos); + exportVertexAttribs(builder); if (m_pipelineState->isUnlinked()) { // If we are building unlinked relocatable shaders, it is possible there are @@ -1524,23 +1522,23 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { // NOTE: We will read back tessellation factors from on-chip LDS in later phases and write them to TF buffer. // Add fence and barrier before the return instruction to make sure they have been stored already. SyncScope::ID syncScope = m_context->getOrInsertSyncScopeID("workgroup"); - new FenceInst(*m_context, AtomicOrdering::Release, syncScope, insertPos); - emitCall("llvm.amdgcn.s.barrier", Type::getVoidTy(*m_context), {}, {}, insertPos); - new FenceInst(*m_context, AtomicOrdering::Acquire, syncScope, insertPos); + builder.CreateFence(AtomicOrdering::Release, syncScope); + builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_s_barrier, {}); + builder.CreateFence(AtomicOrdering::Acquire, syncScope); } else if (m_shaderStage == ShaderStage::Geometry) { if (m_gfxIp.major >= 10) { // NOTE: Per programming guide, we should do a "s_waitcnt 0,0,0 + s_waitcnt_vscnt 0" before issuing a "done", so // we use fence release to generate s_waitcnt vmcnt lgkmcnt/s_waitcnt_vscnt before s_sendmsg(MSG_GS_DONE) SyncScope::ID scope = m_pipelineState->isGsOnChip() ? m_context->getOrInsertSyncScopeID("workgroup") : SyncScope::System; - new FenceInst(*m_context, AtomicOrdering::Release, scope, insertPos); + builder.CreateFence(AtomicOrdering::Release, scope); } auto &entryArgIdxs = m_pipelineState->getShaderInterfaceData(ShaderStage::Geometry)->entryArgIdxs.gs; auto gsWaveId = getFunctionArgument(m_entryPoint, entryArgIdxs.gsWaveId); - Value *args[] = {ConstantInt::get(Type::getInt32Ty(*m_context), GsDone), gsWaveId}; + Value *args[] = {builder.getInt32(GsDone), gsWaveId}; - emitCall("llvm.amdgcn.s.sendmsg", Type::getVoidTy(*m_context), args, {}, insertPos); + builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_s_sendmsg, args); } else if (m_shaderStage == ShaderStage::Fragment) { // Fragment shader export are handled in LowerFragColorExport. return; @@ -1602,11 +1600,11 @@ Value *PatchInOutImportExport::patchGsGenericInputImport(Type *inputTy, unsigned compIdx *= 2; // For 64-bit data type, the component indexing must multiply by 2 // Cast 64-bit data type to float vector - inputTy = FixedVectorType::get(Type::getFloatTy(*m_context), compCount * 2); + inputTy = FixedVectorType::get(builder.getFloatTy(), compCount * 2); } else assert(bitWidth == 8 || bitWidth == 16 || bitWidth == 32); - Value *input = loadValueFromEsGsRing(inputTy, location, compIdx, vertexIdx, &*builder.GetInsertPoint()); + Value *input = loadValueFromEsGsRing(inputTy, location, compIdx, vertexIdx, builder); if (inputTy != origInputTy) { // Cast back to original input type @@ -1838,11 +1836,11 @@ Value *PatchInOutImportExport::patchFsGenericInputImport(Type *inputTy, unsigned Type *interpTy = nullptr; if (bitWidth == 8) { assert(inputTy->isIntOrIntVectorTy()); - interpTy = Type::getInt8Ty(*m_context); + interpTy = builder.getInt8Ty(); } else if (bitWidth == 16) - interpTy = Type::getHalfTy(*m_context); + interpTy = builder.getHalfTy(); else - interpTy = Type::getFloatTy(*m_context); + interpTy = builder.getFloatTy(); if (numChannels > 1) interpTy = FixedVectorType::get(interpTy, numChannels); Value *interp = PoisonValue::get(interpTy); @@ -1963,14 +1961,14 @@ void PatchInOutImportExport::patchVsGenericOutputExport(Value *output, unsigned unsigned compCount = outputTy->isVectorTy() ? cast(outputTy)->getNumElements() * 2 : 2; - outputTy = FixedVectorType::get(Type::getFloatTy(*m_context), compCount); + outputTy = FixedVectorType::get(builder.getFloatTy(), compCount); output = builder.CreateBitCast(output, outputTy); } else assert(bitWidth == 8 || bitWidth == 16 || bitWidth == 32); - storeValueToEsGsRing(output, location, compIdx, &*builder.GetInsertPoint()); + storeValueToEsGsRing(output, location, compIdx, builder); } else - addExportInstForGenericOutput(output, location, compIdx, &*builder.GetInsertPoint()); + addExportInstForGenericOutput(output, location, compIdx, builder); } } @@ -2010,15 +2008,15 @@ void PatchInOutImportExport::patchTesGenericOutputExport(Value *output, unsigned compIdx *= 2; unsigned compCount = outputTy->isVectorTy() ? cast(outputTy)->getNumElements() * 2 : 2; - outputTy = FixedVectorType::get(Type::getFloatTy(*m_context), compCount); + outputTy = FixedVectorType::get(builder.getFloatTy(), compCount); output = builder.CreateBitCast(output, outputTy); } else assert(bitWidth == 8 || bitWidth == 16 || bitWidth == 32); - storeValueToEsGsRing(output, location, compIdx, &*builder.GetInsertPoint()); + storeValueToEsGsRing(output, location, compIdx, builder); } else - addExportInstForGenericOutput(output, location, compIdx, &*builder.GetInsertPoint()); + addExportInstForGenericOutput(output, location, compIdx, builder); } // ===================================================================================================================== @@ -2040,10 +2038,9 @@ void PatchInOutImportExport::patchGsGenericOutputExport(Value *output, unsigned compIdx *= 2; if (outputTy->isVectorTy()) - outputTy = - FixedVectorType::get(Type::getFloatTy(*m_context), cast(outputTy)->getNumElements() * 2); + outputTy = FixedVectorType::get(builder.getFloatTy(), cast(outputTy)->getNumElements() * 2); else - outputTy = FixedVectorType::get(Type::getFloatTy(*m_context), 2); + outputTy = FixedVectorType::get(builder.getFloatTy(), 2); output = builder.CreateBitCast(output, outputTy); } else @@ -2137,10 +2134,10 @@ Value *PatchInOutImportExport::patchTcsBuiltInInputImport(Type *inputTy, unsigne auto elemTy = inputTy->getArrayElementType(); for (unsigned i = 0; i < inputTy->getArrayNumElements(); ++i) { - auto elemIdx = ConstantInt::get(Type::getInt32Ty(*m_context), i); + auto elemIdx = builder.getInt32(i); auto ldsOffset = calcLdsOffsetForTcsInput(elemTy, loc, nullptr, elemIdx, vertexIdx, builder); auto elem = readValueFromLds(false, elemTy, ldsOffset, builder); - builder.CreateInsertValue(input, elem, {i}); + builder.CreateInsertValue(input, elem, i); } } else { auto ldsOffset = calcLdsOffsetForTcsInput(inputTy, loc, nullptr, elemIdx, vertexIdx, builder); @@ -2150,7 +2147,7 @@ Value *PatchInOutImportExport::patchTcsBuiltInInputImport(Type *inputTy, unsigne break; } case BuiltInPatchVertices: { - input = ConstantInt::get(Type::getInt32Ty(*m_context), m_pipelineState->getNumPatchControlPoints()); + input = builder.getInt32(m_pipelineState->getNumPatchControlPoints()); break; } case BuiltInPrimitiveId: { @@ -2329,7 +2326,7 @@ Value *PatchInOutImportExport::patchGsBuiltInInputImport(Type *inputTy, unsigned assert(inOutUsage.builtInInputLocMap.find(builtInId) != inOutUsage.builtInInputLocMap.end()); const unsigned loc = inOutUsage.builtInInputLocMap.find(builtInId)->second; assert(loc != InvalidValue); - input = loadValueFromEsGsRing(inputTy, loc, 0, vertexIdx, &*builder.GetInsertPoint()); + input = loadValueFromEsGsRing(inputTy, loc, 0, vertexIdx, builder); break; } case BuiltInPrimitiveId: { @@ -2649,7 +2646,7 @@ Value *PatchInOutImportExport::patchFsBuiltInInputImport(Type *inputTy, unsigned // gl_ShadingRate is not supported on pre-GFX10.3 assert(m_gfxIp >= GfxIpVersion({10, 3})); - input = getShadingRate(&*builder.GetInsertPoint()); + input = getShadingRate(builder); break; } // Handle internal-use built-ins for sample position emulation @@ -2856,7 +2853,7 @@ Value *PatchInOutImportExport::patchTcsBuiltInOutputImport(Type *outputTy, unsig // Import the whole tessLevel array for (unsigned i = 0; i < outputTy->getArrayNumElements(); ++i) { Value *ldsOffset = builder.CreateAdd(baseOffset, builder.getInt32(tessFactorStart + i)); - auto elem = readValueFromLds(false, Type::getFloatTy(*m_context), ldsOffset, builder); + auto elem = readValueFromLds(false, builder.getFloatTy(), ldsOffset, builder); output = builder.CreateInsertValue(output, elem, {i}); } } else { @@ -2882,10 +2879,8 @@ Value *PatchInOutImportExport::patchTcsBuiltInOutputImport(Type *outputTy, unsig // // @param output : Output value // @param builtInId : ID of the built-in variable -// @param insertPos : Where to insert the patch instruction -void PatchInOutImportExport::patchVsBuiltInOutputExport(Value *output, unsigned builtInId, Instruction *insertPos) { - BuilderBase builder(insertPos); - +// @param builder : the builder to use +void PatchInOutImportExport::patchVsBuiltInOutputExport(Value *output, unsigned builtInId, BuilderBase &builder) { auto outputTy = output->getType(); const auto resUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Vertex); @@ -2916,9 +2911,9 @@ void PatchInOutImportExport::patchVsBuiltInOutputExport(Value *output, unsigned assert(builtInOutLocMap.find(builtInId) != builtInOutLocMap.end()); unsigned loc = builtInOutLocMap.find(builtInId)->second; - storeValueToEsGsRing(output, loc, 0, insertPos); + storeValueToEsGsRing(output, loc, 0, builder); } else - addExportInstForBuiltInOutput(output, builtInId, insertPos); + addExportInstForBuiltInOutput(output, builtInId, builder); } break; @@ -2947,18 +2942,17 @@ void PatchInOutImportExport::patchVsBuiltInOutputExport(Value *output, unsigned auto ldsOffset = calcLdsOffsetForVsOutput(outputTy->getArrayElementType(), loc, 0, builder); for (unsigned i = 0; i < outputTy->getArrayNumElements(); ++i) { - auto elem = ExtractValueInst::Create(output, {i}, "", insertPos); + auto elem = builder.CreateExtractValue(output, {i}); writeValueToLds(false, elem, ldsOffset, builder); - ldsOffset = - BinaryOperator::CreateAdd(ldsOffset, ConstantInt::get(Type::getInt32Ty(*m_context), 1), "", insertPos); + ldsOffset = builder.CreateAdd(ldsOffset, builder.getInt32(1)); } } else { if (m_hasGs) { assert(builtInOutLocMap.find(builtInId) != builtInOutLocMap.end()); unsigned loc = builtInOutLocMap.find(builtInId)->second; - storeValueToEsGsRing(output, loc, 0, insertPos); + storeValueToEsGsRing(output, loc, 0, builder); } else { // NOTE: The export of gl_{Clip,Cull}Distance[] is delayed and is done before entry-point returns. if (builtInId == BuiltInClipDistance) @@ -2987,7 +2981,7 @@ void PatchInOutImportExport::patchVsBuiltInOutputExport(Value *output, unsigned assert(builtInOutLocMap.find(builtInId) != builtInOutLocMap.end()); unsigned loc = builtInOutLocMap.find(builtInId)->second; - storeValueToEsGsRing(output, loc, 0, insertPos); + storeValueToEsGsRing(output, loc, 0, builder); } break; @@ -3009,7 +3003,7 @@ void PatchInOutImportExport::patchVsBuiltInOutputExport(Value *output, unsigned assert(builtInOutLocMap.find(builtInId) != builtInOutLocMap.end()); unsigned loc = builtInOutLocMap.find(builtInId)->second; - storeValueToEsGsRing(output, loc, 0, insertPos); + storeValueToEsGsRing(output, loc, 0, builder); } break; @@ -3022,7 +3016,7 @@ void PatchInOutImportExport::patchVsBuiltInOutputExport(Value *output, unsigned if (!m_hasTs && !m_hasGs) { // gl_PrimitiveShadingRate is not supported on pre-GFX10.3 assert(m_gfxIp >= GfxIpVersion({10, 3})); - addExportInstForBuiltInOutput(output, builtInId, insertPos); + addExportInstForBuiltInOutput(output, builtInId, builder); } break; @@ -3047,11 +3041,9 @@ void PatchInOutImportExport::patchVsBuiltInOutputExport(Value *output, unsigned // @param builtInId : ID of the built-in variable // @param elemIdx : Index used for array/vector element indexing (could be null) // @param vertexIdx : Output array outermost index used for vertex indexing (could be null) -// @param insertPos : Where to insert the patch instruction +// @param builder : the builder to use void PatchInOutImportExport::patchTcsBuiltInOutputExport(Value *output, unsigned builtInId, Value *elemIdx, - Value *vertexIdx, Instruction *insertPos) { - BuilderBase builder(insertPos); - + Value *vertexIdx, BuilderBase &builder) { auto outputTy = output->getType(); const auto resUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessControl); @@ -3094,8 +3086,8 @@ void PatchInOutImportExport::patchTcsBuiltInOutputExport(Value *output, unsigned assert(outputTy->isArrayTy()); for (unsigned i = 0; i < outputTy->getArrayNumElements(); ++i) { - auto elem = ExtractValueInst::Create(output, {i}, "", insertPos); - auto elemIdx = ConstantInt::get(Type::getInt32Ty(*m_context), i); + auto elem = builder.CreateExtractValue(output, i); + auto elemIdx = builder.getInt32(i); auto ldsOffset = calcLdsOffsetForTcsOutput(elem->getType(), loc, nullptr, elemIdx, vertexIdx, builder); writeValueToLds(true, elem, ldsOffset, builder); } @@ -3129,7 +3121,7 @@ void PatchInOutImportExport::patchTcsBuiltInOutputExport(Value *output, unsigned } else { // Handle a single element of tessLevelOuter array Value *ldsOffset = builder.CreateAdd(baseOffset, builder.getInt32(tessFactorStart)); - ldsOffset = builder.CreateAdd(ldsOffset, elemIdx, "", insertPos); + ldsOffset = builder.CreateAdd(ldsOffset, elemIdx); writeValueToLds(false, output, ldsOffset, builder); } @@ -3165,8 +3157,8 @@ void PatchInOutImportExport::patchTcsBuiltInOutputExport(Value *output, unsigned // // @param output : Output value // @param builtInId : ID of the built-in variable -// @param insertPos : Where to insert the patch instruction -void PatchInOutImportExport::patchTesBuiltInOutputExport(Value *output, unsigned builtInId, Instruction *insertPos) { +// @param builder : the builder to use +void PatchInOutImportExport::patchTesBuiltInOutputExport(Value *output, unsigned builtInId, BuilderBase &builder) { const auto resUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessEval); auto &builtInUsage = resUsage->builtInUsage.tes; const auto &builtInOutLocMap = resUsage->inOutUsage.builtInOutputLocMap; @@ -3207,12 +3199,12 @@ void PatchInOutImportExport::patchTesBuiltInOutputExport(Value *output, unsigned assert(builtInOutLocMap.find(builtInId) != builtInOutLocMap.end()); unsigned loc = builtInOutLocMap.find(builtInId)->second; - storeValueToEsGsRing(output, loc, 0, insertPos); + storeValueToEsGsRing(output, loc, 0, builder); } else { switch (builtInId) { case BuiltInPosition: case BuiltInPointSize: - addExportInstForBuiltInOutput(output, builtInId, insertPos); + addExportInstForBuiltInOutput(output, builtInId, builder); break; case BuiltInClipDistance: // NOTE: The export of gl_ClipDistance[] is delayed and is done before entry-point returns. @@ -3241,7 +3233,7 @@ void PatchInOutImportExport::patchTesBuiltInOutputExport(Value *output, unsigned assert(builtInOutLocMap.find(builtInId) != builtInOutLocMap.end()); unsigned loc = builtInOutLocMap.find(builtInId)->second; - storeValueToEsGsRing(output, loc, 0, insertPos); + storeValueToEsGsRing(output, loc, 0, builder); } break; @@ -3258,7 +3250,7 @@ void PatchInOutImportExport::patchTesBuiltInOutputExport(Value *output, unsigned assert(builtInOutLocMap.find(builtInId) != builtInOutLocMap.end()); unsigned loc = builtInOutLocMap.find(builtInId)->second; - storeValueToEsGsRing(output, loc, 0, insertPos); + storeValueToEsGsRing(output, loc, 0, builder); } break; @@ -3328,13 +3320,10 @@ void PatchInOutImportExport::patchGsBuiltInOutputExport(Value *output, unsigned // @param elemIdx : Index used for array/vector element indexing (could be null) // @param vertexOrPrimitiveIdx : Output array outermost index used for vertex or primitive indexing // @param isPerPrimitive : Whether the output is per-primitive -// @param insertPos : Where to insert the patch instruction +// @param builder : the builder to use void PatchInOutImportExport::patchMeshBuiltInOutputExport(Value *output, unsigned builtInId, Value *elemIdx, Value *vertexOrPrimitiveIdx, bool isPerPrimitive, - Instruction *insertPos) { - BuilderBase builder(*m_context); - builder.SetInsertPoint(insertPos); - + BuilderBase &builder) { // Handle primitive indices built-ins if (builtInId == BuiltInPrimitivePointIndices || builtInId == BuiltInPrimitiveLineIndices || builtInId == BuiltInPrimitiveTriangleIndices) { @@ -3430,8 +3419,8 @@ void PatchInOutImportExport::patchMeshBuiltInOutputExport(Value *output, unsigne // // @param output : Output value // @param builtInId : ID of the built-in variable -// @param insertPos : Where to insert the patch instruction -void PatchInOutImportExport::patchFsBuiltInOutputExport(Value *output, unsigned builtInId, Instruction *insertPos) { +// @param builder : the builder to use +void PatchInOutImportExport::patchFsBuiltInOutputExport(Value *output, unsigned builtInId, BuilderBase &builder) { switch (builtInId) { case BuiltInFragDepth: { m_fragDepth = output; @@ -3441,12 +3430,12 @@ void PatchInOutImportExport::patchFsBuiltInOutputExport(Value *output, unsigned assert(output->getType()->isArrayTy()); // NOTE: Only gl_SampleMask[0] is valid for us. - m_sampleMask = ExtractValueInst::Create(output, {0}, "", insertPos); - m_sampleMask = new BitCastInst(m_sampleMask, Type::getFloatTy(*m_context), "", insertPos); + m_sampleMask = builder.CreateExtractValue(output, 0); + m_sampleMask = builder.CreateBitCast(m_sampleMask, builder.getFloatTy()); break; } case BuiltInFragStencilRef: { - m_fragStencilRef = new BitCastInst(output, Type::getFloatTy(*m_context), "", insertPos); + m_fragStencilRef = builder.CreateBitCast(output, builder.getFloatTy()); break; } default: { @@ -3456,29 +3445,18 @@ void PatchInOutImportExport::patchFsBuiltInOutputExport(Value *output, unsigned } } -// ===================================================================================================================== -// Patches export calls for generic outputs of copy shader. -// -// @param output : Output value -// @param location : Location of the output -// @param insertPos : Where to insert the patch instruction -void PatchInOutImportExport::patchCopyShaderGenericOutputExport(Value *output, unsigned location, - Instruction *insertPos) { - addExportInstForGenericOutput(output, location, 0, insertPos); -} - // ===================================================================================================================== // Patches export calls for built-in outputs of copy shader. // // @param output : Output value // @param builtInId : ID of the built-in variable -// @param insertPos : Where to insert the patch instruction +// @param builder : the builder to use void PatchInOutImportExport::patchCopyShaderBuiltInOutputExport(Value *output, unsigned builtInId, - Instruction *insertPos) { + BuilderBase &builder) { switch (builtInId) { case BuiltInPosition: case BuiltInPointSize: { - addExportInstForBuiltInOutput(output, builtInId, insertPos); + addExportInstForBuiltInOutput(output, builtInId, builder); break; } case BuiltInClipDistance: { @@ -3514,7 +3492,7 @@ void PatchInOutImportExport::patchCopyShaderBuiltInOutputExport(Value *output, u case BuiltInPrimitiveShadingRate: { // gl_PrimitiveShadingRate is not supported on pre-GFX10.3 assert(m_gfxIp >= GfxIpVersion({10, 3})); - addExportInstForBuiltInOutput(output, builtInId, insertPos); + addExportInstForBuiltInOutput(output, builtInId, builder); break; } @@ -3602,10 +3580,10 @@ unsigned PatchInOutImportExport::combineBufferStore(const std::vector & unsigned valueOffset, Value *bufDesc, Value *storeOffset, Value *bufBase, CoherentFlag coherent, BuilderBase &builder) { Type *storeTys[4] = { - Type::getInt32Ty(*m_context), - FixedVectorType::get(Type::getInt32Ty(*m_context), 2), - FixedVectorType::get(Type::getInt32Ty(*m_context), 3), - FixedVectorType::get(Type::getInt32Ty(*m_context), 4), + builder.getInt32Ty(), + FixedVectorType::get(builder.getInt32Ty(), 2), + FixedVectorType::get(builder.getInt32Ty(), 3), + FixedVectorType::get(builder.getInt32Ty(), 4), }; std::string funcName = "llvm.amdgcn.raw.tbuffer.store."; @@ -3617,7 +3595,7 @@ unsigned PatchInOutImportExport::combineBufferStore(const std::vector & funcName += getTypeName(storeTys[compCount - 1]); Value *storeValue = nullptr; if (compCount > 1) { - auto storeTy = FixedVectorType::get(Type::getInt32Ty(*m_context), compCount); + auto storeTy = FixedVectorType::get(builder.getInt32Ty(), compCount); storeValue = PoisonValue::get(storeTy); for (unsigned i = 0; i < compCount; ++i) { @@ -3658,10 +3636,10 @@ unsigned PatchInOutImportExport::combineBufferLoad(std::vector &loadVal Value *loadOffset, Value *bufBase, CoherentFlag coherent, BuilderBase &builder) { Type *loadTyps[4] = { - Type::getInt32Ty(*m_context), - FixedVectorType::get(Type::getInt32Ty(*m_context), 2), - FixedVectorType::get(Type::getInt32Ty(*m_context), 3), - FixedVectorType::get(Type::getInt32Ty(*m_context), 4), + builder.getInt32Ty(), + FixedVectorType::get(builder.getInt32Ty(), 2), + FixedVectorType::get(builder.getInt32Ty(), 3), + FixedVectorType::get(builder.getInt32Ty(), 4), }; std::string funcName = "llvm.amdgcn.raw.tbuffer.load."; @@ -3826,9 +3804,9 @@ void PatchInOutImportExport::storeValueToStreamOutBuffer(Value *storeValue, unsi // @param storeValue : Value to store // @param location : Output location // @param compIdx : Output component index -// @param insertPos : Where to insert the store instruction +// @param builder : the builder to use void PatchInOutImportExport::storeValueToEsGsRing(Value *storeValue, unsigned location, unsigned compIdx, - Instruction *insertPos) { + BuilderBase &builder) { auto storeTy = storeValue->getType(); Type *elemTy = storeTy; @@ -3847,26 +3825,25 @@ void PatchInOutImportExport::storeValueToEsGsRing(Value *storeValue, unsigned lo for (unsigned i = 0; i < elemCount; ++i) { Value *storeElem = nullptr; if (storeTy->isArrayTy()) - storeElem = ExtractValueInst::Create(storeValue, {i}, "", insertPos); + storeElem = builder.CreateExtractValue(storeValue, i); else { - storeElem = - ExtractElementInst::Create(storeValue, ConstantInt::get(Type::getInt32Ty(*m_context), i), "", insertPos); + storeElem = builder.CreateExtractElement(storeValue, builder.getInt32(i)); } - storeValueToEsGsRing(storeElem, location + (compIdx + i) / 4, (compIdx + i) % 4, insertPos); + storeValueToEsGsRing(storeElem, location + (compIdx + i) / 4, (compIdx + i) % 4, builder); } } else { if (bitWidth == 8 || bitWidth == 16) { if (storeTy->isFloatingPointTy()) { assert(bitWidth == 16); - storeValue = new BitCastInst(storeValue, Type::getInt16Ty(*m_context), "", insertPos); + storeValue = builder.CreateBitCast(storeValue, builder.getInt16Ty()); } - storeValue = new ZExtInst(storeValue, Type::getInt32Ty(*m_context), "", insertPos); + storeValue = builder.CreateZExt(storeValue, builder.getInt32Ty()); } else { assert(bitWidth == 32); if (storeTy->isFloatingPointTy()) - storeValue = new BitCastInst(storeValue, Type::getInt32Ty(*m_context), "", insertPos); + storeValue = builder.CreateBitCast(storeValue, builder.getInt32Ty()); } // Call buffer store intrinsic or LDS store @@ -3879,36 +3856,13 @@ void PatchInOutImportExport::storeValueToEsGsRing(Value *storeValue, unsigned lo esGsOffset = getFunctionArgument(m_entryPoint, entryArgIdxs.tes.esGsOffset); } - auto ringOffset = calcEsGsRingOffsetForOutput(location, compIdx, esGsOffset, insertPos); - - if (m_pipelineState->isGsOnChip() || m_gfxIp.major >= 9) // ES -> GS ring is always on-chip on GFX9+ - { - Value *idxs[] = {ConstantInt::get(Type::getInt32Ty(*m_context), 0), ringOffset}; - auto ldsType = m_lds->getValueType(); - Value *storePtr = GetElementPtrInst::Create(ldsType, m_lds, idxs, "", insertPos); - new StoreInst(storeValue, storePtr, false, m_lds->getAlign().value(), insertPos); - } else { - Value *esGsRingBufDesc = m_pipelineSysValues.get(m_entryPoint)->getEsGsRingBufDesc(); + auto ringOffset = calcEsGsRingOffsetForOutput(location, compIdx, esGsOffset, builder); - // NOTE: Here we use tbuffer_store instruction instead of buffer_store because we have to do explicit control - // of soffset. This is required by swizzle enabled mode when address range checking should be complied with. - CombineFormat combineFormat = {}; - combineFormat.bits.dfmt = BUF_DATA_FORMAT_32; - combineFormat.bits.nfmt = BUF_NUM_FORMAT_UINT; - CoherentFlag coherent = {}; - coherent.bits.glc = true; - coherent.bits.slc = true; - coherent.bits.swz = true; - Value *args[] = { - storeValue, // vdata - esGsRingBufDesc, // rsrc - ringOffset, // voffset - esGsOffset, // soffset - ConstantInt::get(Type::getInt32Ty(*m_context), combineFormat.u32All), - ConstantInt::get(Type::getInt32Ty(*m_context), coherent.u32All) // glc, slc, swz - }; - emitCall("llvm.amdgcn.raw.tbuffer.store.i32", Type::getVoidTy(*m_context), args, {}, insertPos); - } + // ES -> GS ring is always on-chip on GFX10+ + Value *idxs[] = {builder.getInt32(0), ringOffset}; + auto ldsType = m_lds->getValueType(); + Value *storePtr = builder.CreateGEP(ldsType, m_lds, idxs); + builder.CreateAlignedStore(storeValue, storePtr, m_lds->getAlign().value()); } } @@ -3919,9 +3873,9 @@ void PatchInOutImportExport::storeValueToEsGsRing(Value *storeValue, unsigned lo // @param location : Input location // @param compIdx : Input component index // @param vertexIdx : Vertex index -// @param insertPos : Where to insert the load instruction +// @param builder : the builder to use Value *PatchInOutImportExport::loadValueFromEsGsRing(Type *loadTy, unsigned location, unsigned compIdx, - Value *vertexIdx, Instruction *insertPos) { + Value *vertexIdx, BuilderBase &builder) { Type *elemTy = loadTy; if (loadTy->isArrayTy()) elemTy = cast(loadTy)->getElementType(); @@ -3929,6 +3883,7 @@ Value *PatchInOutImportExport::loadValueFromEsGsRing(Type *loadTy, unsigned loca elemTy = cast(loadTy)->getElementType(); const uint64_t bitWidth = elemTy->getScalarSizeInBits(); + (void)bitWidth; // unused in release builds assert((elemTy->isFloatingPointTy() || elemTy->isIntegerTy()) && (bitWidth == 8 || bitWidth == 16 || bitWidth == 32)); Value *loadValue = PoisonValue::get(loadTy); @@ -3939,63 +3894,21 @@ Value *PatchInOutImportExport::loadValueFromEsGsRing(Type *loadTy, unsigned loca for (unsigned i = 0; i < elemCount; ++i) { auto loadElem = - loadValueFromEsGsRing(elemTy, location + (compIdx + i) / 4, (compIdx + i) % 4, vertexIdx, insertPos); + loadValueFromEsGsRing(elemTy, location + (compIdx + i) / 4, (compIdx + i) % 4, vertexIdx, builder); if (loadTy->isArrayTy()) - loadValue = InsertValueInst::Create(loadValue, loadElem, {i}, "", insertPos); + loadValue = builder.CreateInsertValue(loadValue, loadElem, i); else { - loadValue = InsertElementInst::Create(loadValue, loadElem, ConstantInt::get(Type::getInt32Ty(*m_context), i), - "", insertPos); + loadValue = builder.CreateInsertElement(loadValue, loadElem, i); } } } else { - Value *ringOffset = calcEsGsRingOffsetForInput(location, compIdx, vertexIdx, insertPos); - if (m_pipelineState->isGsOnChip() || m_gfxIp.major >= 9) // ES -> GS ring is always on-chip on GFX9 - { - Value *idxs[] = {ConstantInt::get(Type::getInt32Ty(*m_context), 0), ringOffset}; - auto ldsType = m_lds->getValueType(); - auto *loadPtr = GetElementPtrInst::Create(ldsType, m_lds, idxs, "", insertPos); - auto loadInst = - new LoadInst(loadPtr->getResultElementType(), loadPtr, "", false, m_lds->getAlign().value(), insertPos); - loadValue = loadInst; - - if (bitWidth == 8) - loadValue = new TruncInst(loadValue, Type::getInt8Ty(*m_context), "", insertPos); - else if (bitWidth == 16) - loadValue = new TruncInst(loadValue, Type::getInt16Ty(*m_context), "", insertPos); - - if (loadTy->isFloatingPointTy()) - loadValue = new BitCastInst(loadValue, loadTy, "", insertPos); - } else { - Value *esGsRingBufDesc = m_pipelineSysValues.get(m_entryPoint)->getEsGsRingBufDesc(); - CoherentFlag coherent = {}; - coherent.bits.glc = true; - coherent.bits.slc = true; - Value *args[] = { - esGsRingBufDesc, // rsrc - ringOffset, // offset - ConstantInt::get(Type::getInt32Ty(*m_context), 0), // soffset - ConstantInt::get(Type::getInt32Ty(*m_context), coherent.u32All) // glc slc - }; - loadValue = emitCall("llvm.amdgcn.raw.buffer.load.f32", Type::getFloatTy(*m_context), args, {}, insertPos); - - if (bitWidth == 8) { - assert(loadTy->isIntegerTy()); - - loadValue = new BitCastInst(loadValue, Type::getInt32Ty(*m_context), "", insertPos); - loadValue = new TruncInst(loadValue, Type::getInt8Ty(*m_context), "", insertPos); - } else if (bitWidth == 16) { - loadValue = new BitCastInst(loadValue, Type::getInt32Ty(*m_context), "", insertPos); - loadValue = new TruncInst(loadValue, Type::getInt16Ty(*m_context), "", insertPos); - - if (loadTy->isFloatingPointTy()) - loadValue = new BitCastInst(loadValue, loadTy, "", insertPos); - } else { - assert(bitWidth == 32); - if (loadTy->isIntegerTy()) - loadValue = new BitCastInst(loadValue, loadTy, "", insertPos); - } - } + Value *ringOffset = calcEsGsRingOffsetForInput(location, compIdx, vertexIdx, builder); + // ES -> GS ring is always on-chip on GFX10+ + Value *idxs[] = {builder.getInt32(0), ringOffset}; + auto ldsType = m_lds->getValueType(); + auto *loadPtr = builder.CreateGEP(ldsType, m_lds, idxs); + loadValue = builder.CreateAlignedLoad(loadTy, loadPtr, m_lds->getAlign().value()); } return loadValue; @@ -4025,9 +3938,7 @@ void PatchInOutImportExport::storeValueToGsVsRing(Value *storeValue, unsigned lo if (m_pipelineState->getNggControl()->enableNgg) { // NOTE: For NGG, writing GS output to GS-VS ring is represented by a call and the call is replaced with // real instructions when when NGG primitive shader is generated. - Value *args[] = {ConstantInt::get(Type::getInt32Ty(*m_context), location), - ConstantInt::get(Type::getInt32Ty(*m_context), compIdx), - ConstantInt::get(Type::getInt32Ty(*m_context), streamId), storeValue}; + Value *args[] = {builder.getInt32(location), builder.getInt32(compIdx), builder.getInt32(streamId), storeValue}; std::string callName = lgcName::NggWriteGsOutput + getTypeName(storeTy); builder.CreateNamedCall(callName, Type::getVoidTy(*m_context), args, {}); return; @@ -4120,31 +4031,22 @@ void PatchInOutImportExport::storeValueToGsVsRing(Value *storeValue, unsigned lo // @param location : Output location // @param compIdx : Output component index // @param esGsOffset : ES-GS ring offset in bytes -// @param insertPos : Where to insert the instruction +// @param builder : the builder to use Value *PatchInOutImportExport::calcEsGsRingOffsetForOutput(unsigned location, unsigned compIdx, Value *esGsOffset, - Instruction *insertPos) { - Value *ringOffset = nullptr; - if (m_pipelineState->isGsOnChip() || m_gfxIp.major >= 9) // ES -> GS ring is always on-chip on GFX9 - { - // ringOffset = esGsOffset + threadId * esGsRingItemSize + location * 4 + compIdx + BuilderBase &builder) { + // ES -> GS ring is always on-chip on GFX10+ + // ringOffset = esGsOffset + threadId * esGsRingItemSize + location * 4 + compIdx - assert(m_pipelineState->hasShaderStage(ShaderStage::Geometry)); - const auto &calcFactor = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->inOutUsage.gs.calcFactor; + assert(m_pipelineState->hasShaderStage(ShaderStage::Geometry)); + const auto &calcFactor = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->inOutUsage.gs.calcFactor; - esGsOffset = - BinaryOperator::CreateLShr(esGsOffset, ConstantInt::get(Type::getInt32Ty(*m_context), 2), "", insertPos); + esGsOffset = builder.CreateLShr(esGsOffset, builder.getInt32(2)); - ringOffset = BinaryOperator::CreateMul( - m_threadId, ConstantInt::get(Type::getInt32Ty(*m_context), calcFactor.esGsRingItemSize), "", insertPos); + Value *ringOffset = builder.CreateMul(m_threadId, builder.getInt32(calcFactor.esGsRingItemSize)); - ringOffset = BinaryOperator::CreateAdd(ringOffset, esGsOffset, "", insertPos); + ringOffset = builder.CreateAdd(ringOffset, esGsOffset); - ringOffset = BinaryOperator::CreateAdd( - ringOffset, ConstantInt::get(Type::getInt32Ty(*m_context), (location * 4 + compIdx)), "", insertPos); - } else { - // ringOffset = (location * 4 + compIdx) * 4 - ringOffset = ConstantInt::get(Type::getInt32Ty(*m_context), (location * 4 + compIdx) * 4); - } + ringOffset = builder.CreateAdd(ringOffset, builder.getInt32(location * 4 + compIdx)); return ringOffset; } @@ -4154,30 +4056,16 @@ Value *PatchInOutImportExport::calcEsGsRingOffsetForOutput(unsigned location, un // @param location : Input location // @param compIdx : Input Component index // @param vertexIdx : Vertex index -// @param insertPos : Where to insert the instruction +// @param builder : the builder to use Value *PatchInOutImportExport::calcEsGsRingOffsetForInput(unsigned location, unsigned compIdx, Value *vertexIdx, - Instruction *insertPos) { - Value *ringOffset = nullptr; + BuilderBase &builder) { auto esGsOffsets = m_pipelineSysValues.get(m_entryPoint)->getEsGsOffsets(); - if (m_pipelineState->isGsOnChip() || m_gfxIp.major >= 9) // ES -> GS ring is always on-chip on GFX9 - { - Value *vertexOffset = ExtractElementInst::Create(esGsOffsets, vertexIdx, "", insertPos); - - // ringOffset = vertexOffset[N] + (location * 4 + compIdx); - ringOffset = BinaryOperator::CreateAdd( - vertexOffset, ConstantInt::get(Type::getInt32Ty(*m_context), (location * 4 + compIdx)), "", insertPos); - } else { - Value *vertexOffset = ExtractElementInst::Create(esGsOffsets, vertexIdx, "", insertPos); - - // ringOffset = vertexOffset[N] * 4 + (location * 4 + compIdx) * 64 * 4; - ringOffset = - BinaryOperator::CreateMul(vertexOffset, ConstantInt::get(Type::getInt32Ty(*m_context), 4), "", insertPos); - - ringOffset = BinaryOperator::CreateAdd( - ringOffset, ConstantInt::get(Type::getInt32Ty(*m_context), (location * 4 + compIdx) * 64 * 4), "", insertPos); - } + // ES -> GS ring is always on-chip on GFX10+ + Value *vertexOffset = builder.CreateExtractElement(esGsOffsets, vertexIdx); + // ringOffset = vertexOffset[N] + (location * 4 + compIdx); + Value *ringOffset = builder.CreateAdd(vertexOffset, builder.getInt32(location * 4 + compIdx)); return ringOffset; } @@ -4308,9 +4196,8 @@ Value *PatchInOutImportExport::readValueFromLds(bool offChip, Type *readTy, Valu // Construct , , or vector from load values (dwords) Value *castValue = nullptr; if (numChannels > 1) { - auto intTy = bitWidth == 32 || bitWidth == 64 - ? Type::getInt32Ty(*m_context) - : (bitWidth == 16 ? Type::getInt16Ty(*m_context) : Type::getInt8Ty(*m_context)); + auto intTy = bitWidth == 32 || bitWidth == 64 ? builder.getInt32Ty() + : (bitWidth == 16 ? builder.getInt16Ty() : builder.getInt8Ty()); auto castTy = FixedVectorType::get(intTy, numChannels); castValue = PoisonValue::get(castTy); @@ -4343,9 +4230,8 @@ void PatchInOutImportExport::writeValueToLds(bool offChip, Value *writeValue, Va const unsigned numChannels = compCout * (bitWidth == 64 ? 2 : 1); // Cast write value to vector - Type *intTy = bitWidth == 32 || bitWidth == 64 - ? Type::getInt32Ty(*m_context) - : (bitWidth == 16 ? Type::getInt16Ty(*m_context) : Type::getInt8Ty(*m_context)); + Type *intTy = bitWidth == 32 || bitWidth == 64 ? builder.getInt32Ty() + : (bitWidth == 16 ? builder.getInt16Ty() : builder.getInt8Ty()); Type *castTy = numChannels > 1 ? cast(FixedVectorType::get(intTy, numChannels)) : intTy; Value *castValue = builder.CreateBitCast(writeValue, castTy); @@ -4680,7 +4566,7 @@ unsigned PatchInOutImportExport::calcPatchCountPerThreadGroup(unsigned inVertexC // NOTE: Performance analysis shows that 16 patches per thread group is an optimal upper-bound. The value is only // an experimental number. For GFX9. 64 is an optimal number instead. - const unsigned optimalPatchCountPerThreadGroup = m_gfxIp.major >= 9 ? 64 : 16; + const unsigned optimalPatchCountPerThreadGroup = 64; patchCountPerThreadGroup = std::min(patchCountPerThreadGroup, optimalPatchCountPerThreadGroup); @@ -4720,9 +4606,9 @@ unsigned PatchInOutImportExport::calcPatchCountPerThreadGroup(unsigned inVertexC // @param output : Output value // @param location : Location of the output // @param compIdx : Index used for vector element indexing -// @param insertPos : Where to insert the "exp" instruction +// @param builder : the builder to use void PatchInOutImportExport::addExportInstForGenericOutput(Value *output, unsigned location, unsigned compIdx, - Instruction *insertPos) { + BuilderBase &builder) { // Check if the shader stage is valid to use "exp" instruction to export output const auto nextStage = m_pipelineState->getNextShaderStage(m_shaderStage); const bool useExpInst = ((m_shaderStage == ShaderStage::Vertex || m_shaderStage == ShaderStage::TessEval || @@ -4741,35 +4627,34 @@ void PatchInOutImportExport::addExportInstForGenericOutput(Value *output, unsign Value *exportInst = nullptr; const unsigned numChannels = bitWidth == 64 ? compCount * 2 : compCount; unsigned startChannel = bitWidth == 64 ? compIdx * 2 : compIdx; - Type *exportTy = - numChannels > 1 ? FixedVectorType::get(Type::getFloatTy(*m_context), numChannels) : Type::getFloatTy(*m_context); + Type *exportTy = numChannels > 1 ? FixedVectorType::get(builder.getFloatTy(), numChannels) : builder.getFloatTy(); if (outputTy != exportTy) { if (bitWidth == 8) { // NOTE: For 16-bit output export, we have to cast the 8-bit value to 32-bit floating-point value. assert(outputTy->isIntOrIntVectorTy()); - Type *zExtTy = Type::getInt32Ty(*m_context); + Type *zExtTy = builder.getInt32Ty(); zExtTy = outputTy->isVectorTy() ? cast(FixedVectorType::get(zExtTy, compCount)) : zExtTy; - exportInst = new ZExtInst(output, zExtTy, "", insertPos); - exportInst = new BitCastInst(exportInst, exportTy, "", insertPos); + exportInst = builder.CreateZExt(output, zExtTy); + exportInst = builder.CreateBitCast(exportInst, exportTy); } else if (bitWidth == 16) { // NOTE: For 16-bit output export, we have to cast the 16-bit value to 32-bit floating-point value. if (outputTy->isFPOrFPVectorTy()) { - Type *bitCastTy = Type::getInt16Ty(*m_context); + Type *bitCastTy = builder.getInt16Ty(); bitCastTy = outputTy->isVectorTy() ? cast(FixedVectorType::get(bitCastTy, compCount)) : bitCastTy; - exportInst = new BitCastInst(output, bitCastTy, "", insertPos); + exportInst = builder.CreateBitCast(output, bitCastTy); } else { assert(outputTy->isIntOrIntVectorTy()); exportInst = output; } - Type *zExtTy = Type::getInt32Ty(*m_context); + Type *zExtTy = builder.getInt32Ty(); zExtTy = outputTy->isVectorTy() ? cast(FixedVectorType::get(zExtTy, compCount)) : zExtTy; - exportInst = new ZExtInst(exportInst, zExtTy, "", insertPos); - exportInst = new BitCastInst(exportInst, exportTy, "", insertPos); + exportInst = builder.CreateZExt(exportInst, zExtTy); + exportInst = builder.CreateBitCast(exportInst, exportTy); } else { assert(canBitCast(outputTy, exportTy)); - exportInst = new BitCastInst(output, exportTy, "", insertPos); + exportInst = builder.CreateBitCast(output, exportTy); } } else exportInst = output; @@ -4781,12 +4666,11 @@ void PatchInOutImportExport::addExportInstForGenericOutput(Value *output, unsign exportValues[0] = exportInst; else { for (unsigned i = 0; i < numChannels; ++i) { - exportValues[i] = - ExtractElementInst::Create(exportInst, ConstantInt::get(Type::getInt32Ty(*m_context), i), "", insertPos); + exportValues[i] = builder.CreateExtractElement(exportInst, i); } } - auto poison = PoisonValue::get(Type::getFloatTy(*m_context)); + auto poison = PoisonValue::get(builder.getFloatTy()); if (numChannels <= 4) { assert(startChannel + numChannels <= 4); @@ -4818,68 +4702,67 @@ void PatchInOutImportExport::addExportInstForGenericOutput(Value *output, unsign // // @param output : Output value // @param builtInId : ID of the built-in variable -// @param insertPos : Where to insert the "exp" instruction -void PatchInOutImportExport::addExportInstForBuiltInOutput(Value *output, unsigned builtInId, Instruction *insertPos) { - const auto poison = PoisonValue::get(Type::getFloatTy(*m_context)); +// @param builder : the builder to use +void PatchInOutImportExport::addExportInstForBuiltInOutput(Value *output, unsigned builtInId, BuilderBase &builder) { + const auto poison = PoisonValue::get(builder.getFloatTy()); switch (builtInId) { case BuiltInPosition: { Value *args[] = { - ConstantInt::get(Type::getInt32Ty(*m_context), EXP_TARGET_POS_0), // tgt - ConstantInt::get(Type::getInt32Ty(*m_context), 0xF), // en + builder.getInt32(EXP_TARGET_POS_0), // tgt + builder.getInt32(0xF), // en nullptr, nullptr, nullptr, nullptr, - ConstantInt::get(Type::getInt1Ty(*m_context), false), // done - ConstantInt::get(Type::getInt1Ty(*m_context), false) // vm + builder.getInt1(false), // done + builder.getInt1(false) // vm }; // src0 ~ src3 for (unsigned i = 0; i < 4; ++i) { - auto compValue = - ExtractElementInst::Create(output, ConstantInt::get(Type::getInt32Ty(*m_context), i), "", insertPos); + auto compValue = builder.CreateExtractElement(output, builder.getInt32(i)); args[2 + i] = compValue; } - emitCall("llvm.amdgcn.exp.f32", Type::getVoidTy(*m_context), args, {}, insertPos); + builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_exp, args); break; } case BuiltInPointSize: { Value *args[] = { - ConstantInt::get(Type::getInt32Ty(*m_context), EXP_TARGET_POS_1), // tgt - ConstantInt::get(Type::getInt32Ty(*m_context), 0x1), // en - output, // src0 - poison, // src1 - poison, // src2 - poison, // src3 - ConstantInt::get(Type::getInt1Ty(*m_context), false), // done - ConstantInt::get(Type::getInt1Ty(*m_context), false) // vm + builder.getInt32(EXP_TARGET_POS_1), // tgt + builder.getInt32(0x1), // en + output, // src0 + poison, // src1 + poison, // src2 + poison, // src3 + builder.getInt1(false), // done + builder.getInt1(false) // vm }; - emitCall("llvm.amdgcn.exp.f32", Type::getVoidTy(*m_context), args, {}, insertPos); + builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_exp, args); break; } case BuiltInPrimitiveShadingRate: { // gl_PrimitiveShadingRate is not supported on pre-GFX10.3 assert(m_gfxIp >= GfxIpVersion({10, 3})); - exportShadingRate(output, insertPos); + exportShadingRate(output, builder); break; } case BuiltInEdgeFlag: { - Value *edgeflag = new BitCastInst(output, Type::getFloatTy(*m_context), "", insertPos); + Value *edgeflag = builder.CreateBitCast(output, builder.getFloatTy()); Value *args[] = { - ConstantInt::get(Type::getInt32Ty(*m_context), EXP_TARGET_POS_1), // tgt - ConstantInt::get(Type::getInt32Ty(*m_context), 0x2), // en - PoisonValue::get(Type::getFloatTy(*m_context)), // src1 - edgeflag, // src0 - PoisonValue::get(Type::getFloatTy(*m_context)), // src2 - PoisonValue::get(Type::getFloatTy(*m_context)), // src3 - ConstantInt::get(Type::getInt1Ty(*m_context), false), // done - ConstantInt::get(Type::getInt1Ty(*m_context), false) // vm + builder.getInt32(EXP_TARGET_POS_1), // tgt + builder.getInt32(0x2), // en + PoisonValue::get(builder.getFloatTy()), // src1 + edgeflag, // src0 + PoisonValue::get(builder.getFloatTy()), // src2 + PoisonValue::get(builder.getFloatTy()), // src3 + builder.getInt1(false), // done + builder.getInt1(false) // vm }; - emitCall("llvm.amdgcn.exp.f32", Type::getVoidTy(*m_context), args, {}, insertPos); + builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_exp, args); break; } default: { @@ -4986,13 +4869,11 @@ SwizzleWorkgroupLayout PatchInOutImportExport::calculateWorkgroupLayout() { // @param workgroupSizeZ : WorkgroupSize Z for thread Id numbers // @param isHwLocalInvocationId : identify whether the localInvocationId is builtInLocalInvcocationId or // BuiltInUnswizzledLocalInvocationId -// @param insertPos : Where to insert instructions. +// @param builder : the builder to use Value *PatchInOutImportExport::reconfigWorkgroupLayout(Value *localInvocationId, WorkgroupLayout macroLayout, WorkgroupLayout microLayout, unsigned workgroupSizeX, unsigned workgroupSizeY, unsigned workgroupSizeZ, - bool isHwLocalInvocationId, llvm::Instruction *insertPos) { - BuilderBase builder(*m_context); - builder.SetInsertPoint(insertPos); + bool isHwLocalInvocationId, BuilderBase &builder) { Value *apiX = builder.getInt32(0); Value *apiY = builder.getInt32(0); Value *newLocalInvocationId = PoisonValue::get(localInvocationId->getType()); @@ -5144,7 +5025,7 @@ void PatchInOutImportExport::createSwizzleThreadGroupFunction() { BuilderBase builder(*m_context); - Type *ivec3Ty = FixedVectorType::get(Type::getInt32Ty(*m_context), 3); + Type *ivec3Ty = FixedVectorType::get(builder.getInt32Ty(), 3); auto func = m_module->getFunction(lgcName::SwizzleWorkgroupId); @@ -5304,14 +5185,14 @@ void PatchInOutImportExport::createSwizzleThreadGroupFunction() { // Helper to compact bits for Z-order curve auto createCompact1By1Bits = [&](unsigned bitsToExtract, Value *src) { auto createCompactShift = [&](unsigned shift, unsigned mask, Value *src) { - auto result = builder.CreateLShr(src, ConstantInt::get(Type::getInt32Ty(*m_context), shift)); + auto result = builder.CreateLShr(src, builder.getInt32(shift)); result = builder.CreateOr(result, src); - result = builder.CreateAnd(result, ConstantInt::get(Type::getInt32Ty(*m_context), mask)); + result = builder.CreateAnd(result, builder.getInt32(mask)); return result; }; // x &= 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 - auto result = builder.CreateAnd(src, ConstantInt::get(Type::getInt32Ty(*m_context), 0x55555555)); + auto result = builder.CreateAnd(src, builder.getInt32(0x55555555)); // x = (x | (x >> 1)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10 // NOLINT result = createCompactShift(1, 0x33333333, result); @@ -5393,11 +5274,8 @@ void PatchInOutImportExport::createSwizzleThreadGroupFunction() { // Exports HW shading rate, extracting the values from LGC shading rate (a mask of ShadingRateFlags) // // @param shadingRate : LGC shading rate -// @param insertPos : Where to insert instructions. -void PatchInOutImportExport::exportShadingRate(Value *shadingRate, Instruction *insertPos) { - BuilderBase builder(*m_context); - builder.SetInsertPoint(insertPos); - +// @param builder : the builder to use +void PatchInOutImportExport::exportShadingRate(Value *shadingRate, BuilderBase &builder) { assert(m_gfxIp >= GfxIpVersion({10, 3})); // Must be GFX10.3+ Value *hwShadingRate = nullptr; @@ -5476,11 +5354,8 @@ void PatchInOutImportExport::exportShadingRate(Value *shadingRate, Instruction * // ===================================================================================================================== // Gets HW shading rate and converts them to LGC definitions. // -// @param insertPos : Where to insert instructions. -Value *PatchInOutImportExport::getShadingRate(Instruction *insertPos) { - BuilderBase builder(*m_context); - builder.SetInsertPoint(insertPos); - +// @param builder : the builder to use +Value *PatchInOutImportExport::getShadingRate(BuilderBase &builder) { assert(m_gfxIp >= GfxIpVersion({10, 3})); // Must be GFX10.3+ assert(m_shaderStage == ShaderStage::Fragment); @@ -5574,8 +5449,8 @@ void PatchInOutImportExport::recordVertexAttribExport(unsigned location, ArrayRe // ===================================================================================================================== // Exports vertex attributes that were recorded previously // -// @param insertPos : Where to insert instructions. -void PatchInOutImportExport::exportVertexAttribs(Instruction *insertPos) { +// @param builder : the builder to use +void PatchInOutImportExport::exportVertexAttribs(BuilderBase &builder) { assert(m_shaderStage == ShaderStage::Vertex || m_shaderStage == ShaderStage::TessEval || m_shaderStage == ShaderStage::CopyShader); // Valid shader stages if (m_attribExports.empty()) { @@ -5583,9 +5458,6 @@ void PatchInOutImportExport::exportVertexAttribs(Instruction *insertPos) { return; } - BuilderBase builder(*m_context); - builder.SetInsertPoint(insertPos); - for (auto &attribExport : m_attribExports) { if (m_gfxIp.major <= 10) { unsigned channelMask = 0; @@ -5611,10 +5483,10 @@ void PatchInOutImportExport::exportVertexAttribs(Instruction *insertPos) { // NOTE: For GFX11+, vertex attributes are exported through memory. This call will be expanded when NGG primitive // shader is generated. The arguments are: buffer descriptor of attribute ring, attribute location, and attribute // export value. - emitCall(lgcName::NggAttribExport, builder.getVoidTy(), - {m_pipelineSysValues.get(m_entryPoint)->getAttribRingBufDesc(), builder.getInt32(attribExport.first), - attribValue}, - {}, insertPos); + builder.CreateNamedCall(lgcName::NggAttribExport, builder.getVoidTy(), + {m_pipelineSysValues.get(m_entryPoint)->getAttribRingBufDesc(), + builder.getInt32(attribExport.first), attribValue}, + {}); } } } diff --git a/lgc/patch/PatchPreparePipelineAbi.cpp b/lgc/patch/PatchPreparePipelineAbi.cpp index b19105e429..479a5f871d 100644 --- a/lgc/patch/PatchPreparePipelineAbi.cpp +++ b/lgc/patch/PatchPreparePipelineAbi.cpp @@ -91,8 +91,7 @@ PreservedAnalyses PatchPreparePipelineAbi::run(Module &module, ModuleAnalysisMan if (auto hsEntryPoint = m_pipelineShaders->getEntryPoint(ShaderStage::TessControl)) storeTessFactors(hsEntryPoint); - if (m_gfxIp.major >= 9) - mergeShader(module); + mergeShader(module); setAbiEntryNames(module); @@ -230,14 +229,14 @@ void PatchPreparePipelineAbi::writeTessFactors(PipelineState *pipelineState, Val if (primitiveMode == PrimitiveMode::Isolines) { assert(numOuterTfs == 2 && numInnerTfs == 0); - builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, outerTf->getType(), - {outerTf, // vdata - tfBufferDesc, // rsrc - tfBufferOffset, // voffset - tfBufferBase, // soffset - builder.getInt32(bufferFormatX2), // format - builder.getInt32(coherent.u32All)}); // glc - + auto callInst = builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, outerTf->getType(), + {outerTf, // vdata + tfBufferDesc, // rsrc + tfBufferOffset, // voffset + tfBufferBase, // soffset + builder.getInt32(bufferFormatX2), // format + builder.getInt32(coherent.u32All)}); // glc + (void)callInst; } else if (primitiveMode == PrimitiveMode::Triangles) { assert(numOuterTfs == 3 && numInnerTfs == 1); @@ -246,33 +245,35 @@ void PatchPreparePipelineAbi::writeTessFactors(PipelineState *pipelineState, Val tessFactor = builder.CreateInsertElement(tessFactor, builder.CreateExtractElement(innerTf, static_cast(0)), 3); - builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, tessFactor->getType(), - {tessFactor, // vdata - tfBufferDesc, // rsrc - tfBufferOffset, // voffset - tfBufferBase, // soffset - builder.getInt32(bufferFormatX4), // format - builder.getInt32(coherent.u32All)}); // glc + auto callInst = builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, tessFactor->getType(), + {tessFactor, // vdata + tfBufferDesc, // rsrc + tfBufferOffset, // voffset + tfBufferBase, // soffset + builder.getInt32(bufferFormatX4), // format + builder.getInt32(coherent.u32All)}); // glc + (void)callInst; } else { assert(primitiveMode == PrimitiveMode::Quads); assert(numOuterTfs == 4 && numInnerTfs == 2); - builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, outerTf->getType(), - {outerTf, // vdata - tfBufferDesc, // rsrc - tfBufferOffset, // voffset - tfBufferBase, // soffset - builder.getInt32(bufferFormatX4), // format - builder.getInt32(coherent.u32All)}); // glc + auto callInst = builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, outerTf->getType(), + {outerTf, // vdata + tfBufferDesc, // rsrc + tfBufferOffset, // voffset + tfBufferBase, // soffset + builder.getInt32(bufferFormatX4), // format + builder.getInt32(coherent.u32All)}); // glc tfBufferOffset = builder.CreateAdd(tfBufferOffset, builder.getInt32(4 * sizeof(float))); - builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, innerTf->getType(), - {innerTf, // vdata - tfBufferDesc, // rsrc - tfBufferOffset, // voffset - tfBufferBase, // soffset - builder.getInt32(bufferFormatX2), // format - builder.getInt32(coherent.u32All)}); // glc + callInst = builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, innerTf->getType(), + {innerTf, // vdata + tfBufferDesc, // rsrc + tfBufferOffset, // voffset + tfBufferBase, // soffset + builder.getInt32(bufferFormatX2), // format + builder.getInt32(coherent.u32All)}); // glc + (void)callInst; } } @@ -281,7 +282,7 @@ void PatchPreparePipelineAbi::writeTessFactors(PipelineState *pipelineState, Val // // @param module : LLVM module void PatchPreparePipelineAbi::mergeShader(Module &module) { - assert(m_gfxIp.major >= 9); + assert(m_gfxIp.major >= 10); const bool hasTs = (m_hasTcs || m_hasTes); diff --git a/lgc/patch/PatchResourceCollect.cpp b/lgc/patch/PatchResourceCollect.cpp index 85a67be8fc..fb8a9f5eca 100644 --- a/lgc/patch/PatchResourceCollect.cpp +++ b/lgc/patch/PatchResourceCollect.cpp @@ -479,6 +479,9 @@ bool PatchResourceCollect::checkGsOnChipValidity() { case PrimitiveType::TriangleStripAdjacency: inVertsPerPrim = 3; break; + case PrimitiveType::Patch: + inVertsPerPrim = 1; + break; default: llvm_unreachable("Unexpected primitive type!"); break; @@ -961,28 +964,25 @@ bool PatchResourceCollect::checkGsOnChipValidity() { LLPC_OUTS("\n"); } - if (gsOnChip || m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 9) { - if (gsResUsage->inOutUsage.gs.calcFactor.rayQueryLdsStackSize > 0) { - LLPC_OUTS("Ray query LDS stack size (in dwords): " - << gsResUsage->inOutUsage.gs.calcFactor.rayQueryLdsStackSize - << " (start = " << gsResUsage->inOutUsage.gs.calcFactor.gsOnChipLdsSize << ")\n\n"); - } + if (gsResUsage->inOutUsage.gs.calcFactor.rayQueryLdsStackSize > 0) { + LLPC_OUTS("Ray query LDS stack size (in dwords): " + << gsResUsage->inOutUsage.gs.calcFactor.rayQueryLdsStackSize + << " (start = " << gsResUsage->inOutUsage.gs.calcFactor.gsOnChipLdsSize << ")\n\n"); + } - if (meshPipeline) { - LLPC_OUTS("GS primitive amplification factor: " << gsResUsage->inOutUsage.gs.calcFactor.primAmpFactor << "\n"); - LLPC_OUTS("\n"); - LLPC_OUTS("GS is on-chip (Mesh)\n"); - } else if (m_pipelineState->getNggControl()->enableNgg) { - LLPC_OUTS("GS primitive amplification factor: " << gsResUsage->inOutUsage.gs.calcFactor.primAmpFactor << "\n"); - LLPC_OUTS("GS enable max output vertices per instance: " - << (gsResUsage->inOutUsage.gs.calcFactor.enableMaxVertOut ? "true" : "false") << "\n"); - LLPC_OUTS("\n"); - LLPC_OUTS("GS is on-chip (NGG)\n"); - } else { - LLPC_OUTS("GS is " << (gsOnChip ? "on-chip" : "off-chip") << "\n"); - } - } else - LLPC_OUTS("GS is off-chip\n"); + if (meshPipeline) { + LLPC_OUTS("GS primitive amplification factor: " << gsResUsage->inOutUsage.gs.calcFactor.primAmpFactor << "\n"); + LLPC_OUTS("\n"); + LLPC_OUTS("GS is on-chip (Mesh)\n"); + } else if (m_pipelineState->getNggControl()->enableNgg) { + LLPC_OUTS("GS primitive amplification factor: " << gsResUsage->inOutUsage.gs.calcFactor.primAmpFactor << "\n"); + LLPC_OUTS("GS enable max output vertices per instance: " + << (gsResUsage->inOutUsage.gs.calcFactor.enableMaxVertOut ? "true" : "false") << "\n"); + LLPC_OUTS("\n"); + LLPC_OUTS("GS is on-chip (NGG)\n"); + } else { + LLPC_OUTS("GS is " << (gsOnChip ? "on-chip" : "off-chip") << "\n"); + } LLPC_OUTS("\n"); return gsOnChip; diff --git a/lgc/patch/RegisterMetadataBuilder.cpp b/lgc/patch/RegisterMetadataBuilder.cpp index f8b6558783..35e5dc7124 100644 --- a/lgc/patch/RegisterMetadataBuilder.cpp +++ b/lgc/patch/RegisterMetadataBuilder.cpp @@ -391,10 +391,7 @@ void RegisterMetadataBuilder::buildEsGsRegisters() { // GE_MAX_OUTPUT_PER_SUBGROUP and VGT_GS_MAX_PRIMS_PER_SUBGROUP const unsigned maxPrimsPerSubgroup = std::min(gsInstPrimsInSubgrp * maxVertOut, MaxGsThreadsPerSubgroup); - if (m_gfxIp.major == 9) - getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::MaxPrimsPerSubgroup] = maxPrimsPerSubgroup; - else - getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::MaxVertsPerSubgroup] = maxPrimsPerSubgroup; + getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::MaxVertsPerSubgroup] = maxPrimsPerSubgroup; // Set LDS_SIZE of SPI_SHADER_PGM_RSRC2_GS unsigned ldsSizeInDwords = calcFactor.gsOnChipLdsSize; @@ -545,6 +542,9 @@ void RegisterMetadataBuilder::buildPrimShaderRegisters() { case PrimitiveType::TriangleStripAdjacency: gsOutputPrimitiveType = TRISTRIP; break; + case PrimitiveType::Patch: + gsOutputPrimitiveType = POINTLIST; + break; default: llvm_unreachable("Should never be called!"); break; diff --git a/lgc/patch/ShaderMerger.cpp b/lgc/patch/ShaderMerger.cpp index 2411a9fb57..3633aff185 100644 --- a/lgc/patch/ShaderMerger.cpp +++ b/lgc/patch/ShaderMerger.cpp @@ -56,7 +56,7 @@ using namespace lgc; ShaderMerger::ShaderMerger(PipelineState *pipelineState, PipelineShadersResult *pipelineShaders) : m_pipelineState(pipelineState), m_context(&pipelineState->getContext()), m_gfxIp(pipelineState->getTargetInfo().getGfxIpVersion()) { - assert(m_gfxIp.major >= 9); + assert(m_gfxIp.major >= 10); assert(m_pipelineState->isGraphics()); m_hasVs = m_pipelineState->hasShaderStage(ShaderStage::Vertex); @@ -93,7 +93,7 @@ unsigned ShaderMerger::getSpecialSgprInputIndex(GfxIpVersion gfxIp, LsHs::Specia {LsHs::waveIdInGroup, 5}, // s5 }; - assert(gfxIp.major >= 9); // Must be GFX9+ + assert(gfxIp.major >= 10); // Must be GFX10+ if (gfxIp.major >= 11) { assert(LsHsSpecialSgprInputMapGfx11.count(sgprInput) > 0); @@ -145,7 +145,7 @@ unsigned ShaderMerger::getSpecialSgprInputIndex(GfxIpVersion gfxIp, EsGs::Specia {EsGs::FlatScratchHigh, 7}, // s7 }; - assert(gfxIp.major >= 9); // Must be GFX9+ + assert(gfxIp.major >= 10); // Must be GFX10+ if (gfxIp.major >= 11) { assert(EsGsSpecialSgprInputMapGfx11.count(sgprInput) > 0); @@ -309,7 +309,8 @@ Function *ShaderMerger::generateLsHsEntryPoint(Function *lsEntryPoint, Function // Create the entrypoint for the merged shader, and insert it at the start. This has to be done for unlinked shaders // because the vertex fetch shader will be prepended to this module and expect the fall through into the merged // shader. - Function *entryPoint = Function::Create(entryPointTy, GlobalValue::ExternalLinkage, lgcName::LsHsEntryPoint); + Function *entryPoint = createFunctionHelper(entryPointTy, GlobalValue::ExternalLinkage, hsEntryPoint->getParent(), + lgcName::LsHsEntryPoint); entryPoint->setDLLStorageClass(GlobalValue::DLLExportStorageClass); auto module = hsEntryPoint->getParent(); module->getFunctionList().push_front(entryPoint); @@ -638,7 +639,8 @@ Function *ShaderMerger::generateEsGsEntryPoint(Function *esEntryPoint, Function // Create the entrypoint for the merged shader, and insert it at the start. This has to be done for unlinked shaders // because the vertex fetch shader will be prepended to this module and expect the fall through into the merged // shader. - Function *entryPoint = Function::Create(entryPointTy, GlobalValue::ExternalLinkage, lgcName::EsGsEntryPoint); + Function *entryPoint = + createFunctionHelper(entryPointTy, GlobalValue::ExternalLinkage, module, lgcName::EsGsEntryPoint); entryPoint->setDLLStorageClass(GlobalValue::DLLExportStorageClass); module->getFunctionList().push_front(entryPoint); diff --git a/lgc/patch/VertexFetch.cpp b/lgc/patch/VertexFetch.cpp index 1c6c152758..7ef14969a8 100644 --- a/lgc/patch/VertexFetch.cpp +++ b/lgc/patch/VertexFetch.cpp @@ -126,7 +126,8 @@ class VertexFetchImpl : public VertexFetch { bool needPatch32(const VertexInputDescription *inputDesc) const; - bool needPackFormatEmulation(const VertexInputDescription *inputDesc, std::vector &extractMask) const; + bool needPackFormatEmulation(const VertexInputDescription *inputDesc, const VertexNumFormatInfo *numFormatInfo, + std::vector &extractMask, std::vector &normalizationFactors) const; void postFetchEmulation(const VertexInputDescription *description, bool fetchInByte, unsigned inputCompBytes, unsigned numChannels, const VertexNumFormatInfo *numFormatInfo, @@ -1383,7 +1384,9 @@ unsigned VertexFetchImpl::mapVertexFormat(unsigned dfmt, unsigned nfmt) const { // @param inputDesc : Vertex input description. // @param [out] extractMask : Bits extract mask. bool VertexFetchImpl::needPackFormatEmulation(const VertexInputDescription *inputDesc, - std::vector &extractMask) const { + const VertexNumFormatInfo *numFormatInfo, + std::vector &extractMask, + std::vector &normalizationFactors) const { switch (inputDesc->dfmt) { case BufDataFormat10_11_11: extractMask.push_back(11); @@ -1400,12 +1403,38 @@ bool VertexFetchImpl::needPackFormatEmulation(const VertexInputDescription *inpu extractMask.push_back(10); extractMask.push_back(10); extractMask.push_back(10); + if (numFormatInfo->isNorm) { + if (numFormatInfo->isSigned) { + normalizationFactors.push_back(1.0f); + normalizationFactors.push_back(1 / 512.0f); + normalizationFactors.push_back(1 / 512.0f); + normalizationFactors.push_back(1 / 512.0f); + } else { + normalizationFactors.push_back(1 / 3.0f); + normalizationFactors.push_back(1 / 1023.0f); + normalizationFactors.push_back(1 / 1023.0f); + normalizationFactors.push_back(1 / 1023.0f); + } + } return true; case BufDataFormat2_10_10_10: extractMask.push_back(10); extractMask.push_back(10); extractMask.push_back(10); extractMask.push_back(2); + if (numFormatInfo->isNorm) { + if (numFormatInfo->isSigned) { + normalizationFactors.push_back(1 / 512.0f); + normalizationFactors.push_back(1 / 512.0f); + normalizationFactors.push_back(1 / 512.0f); + normalizationFactors.push_back(1.0f); + } else { + normalizationFactors.push_back(1 / 1023.0f); + normalizationFactors.push_back(1 / 1023.0f); + normalizationFactors.push_back(1 / 1023.0f); + normalizationFactors.push_back(1 / 3.0f); + } + } return true; default: break; @@ -1480,8 +1509,10 @@ void VertexFetchImpl::postFetchEmulation(const VertexInputDescription *descripti // Do post-processing in certain cases std::vector shuffleMask; std::vector extractMask; + std::vector normalizationFactors; + bool isPacked = needPackFormatEmulation(description, numFormatInfo, extractMask, normalizationFactors); // Emulation for packed formats. - if (fetchInByte && needPackFormatEmulation(description, extractMask)) { + if (fetchInByte && isPacked) { // Must be 8 bit fetch in Byte. Value *packedVertex = PoisonValue::get(FixedVectorType::get(builderImpl.getInt8Ty(), descFormatInfo->vertexByteSize)); @@ -1512,7 +1543,8 @@ void VertexFetchImpl::postFetchEmulation(const VertexInputDescription *descripti assert(shuffleMask.empty() == false); *ppFetch = builderImpl.CreateShuffleVector(*ppFetch, *ppFetch, ConstantVector::get(shuffleMask)); } - if (fetchInByte || needPatch32(description)) { + bool isPatch32 = needPatch32(description); + if (fetchInByte || isPatch32) { Type *compFloatTy = getVertexFetchType(true, inputCompBytes, builderImpl); for (unsigned i = 0; i < numChannels; ++i) { Value *elemInstr = nullptr; @@ -1522,18 +1554,22 @@ void VertexFetchImpl::postFetchEmulation(const VertexInputDescription *descripti elemInstr = builderImpl.CreateExtractElement(*ppFetch, builderImpl.getInt32(i)); if (numFormatInfo->isNorm || numFormatInfo->isScaled) { // A constant divisor for normalization emulation. - float normDiv = 2.14748365e+09f; + float normDiv = isPatch32 ? 2.14748365e+09f : 32767.0f; if (numFormatInfo->isSigned) { // Signed int to float elemInstr = builderImpl.CreateSIToFP(elemInstr, compFloatTy); } else { // Unsigned int to float elemInstr = builderImpl.CreateUIToFP(elemInstr, compFloatTy); - normDiv = 4.29496730e+09f; + normDiv = isPatch32 ? 4.29496730e+09f : 65535.0f; } if (numFormatInfo->isNorm) { // Normalization emulation. - elemInstr = builderImpl.CreateFDiv(elemInstr, ConstantFP::get(compFloatTy, normDiv)); + if (isPacked) { + elemInstr = builderImpl.CreateFMul(elemInstr, ConstantFP::get(compFloatTy, normalizationFactors[i])); + } else { + elemInstr = builderImpl.CreateFDiv(elemInstr, ConstantFP::get(compFloatTy, normDiv)); + } } } else if (description->nfmt == BufNumFormatFixed) { // A constant divisor to translate loaded float bits to fixed point format. diff --git a/lgc/state/Compiler.cpp b/lgc/state/Compiler.cpp index 03bd2a948a..bc123a1914 100644 --- a/lgc/state/Compiler.cpp +++ b/lgc/state/Compiler.cpp @@ -28,7 +28,7 @@ * @brief LLPC source file: PipelineState methods that do IR linking and compilation *********************************************************************************************************************** */ -#include "continuations/Continuations.h" +#include "llvmraytracing/Continuations.h" #include "lgc/LgcContext.h" #include "lgc/PassManager.h" #include "lgc/patch/Patch.h" diff --git a/lgc/state/LgcContext.cpp b/lgc/state/LgcContext.cpp index 547e971dc3..b16d1046a5 100644 --- a/lgc/state/LgcContext.cpp +++ b/lgc/state/LgcContext.cpp @@ -203,8 +203,6 @@ std::string LgcContext::getGpuNameString(unsigned major, unsigned minor, unsigne gpuNameStream << "gfx" << major << minor; if (stepping >= 0xFFFA) gpuNameStream << char(stepping - 0xFFFA + 'A'); - else if (major == 9 && stepping >= 10) - gpuNameStream << char(stepping - 10 + 'a'); else gpuNameStream << stepping; diff --git a/lgc/state/PalMetadata.cpp b/lgc/state/PalMetadata.cpp index 8d7cf93794..3f467bf640 100644 --- a/lgc/state/PalMetadata.cpp +++ b/lgc/state/PalMetadata.cpp @@ -417,23 +417,7 @@ unsigned PalMetadata::getUserDataReg0(ShaderStageEnum stage) { m_userDataRegMapping[ShaderStage::Task] = mmCOMPUTE_USER_DATA_0; m_userDataRegMapping[ShaderStage::Mesh] = mmSPI_SHADER_USER_DATA_GS_0; - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major == 9) { - // GFX9: Merged shaders, and merged ES-GS user data goes into ES registers. - m_userDataRegMapping[ShaderStage::CopyShader] = mmSPI_SHADER_USER_DATA_VS_0; - m_userDataRegMapping[ShaderStage::Geometry] = mmSPI_SHADER_USER_DATA_ES_0; - if (m_pipelineState->hasShaderStage(ShaderStage::Geometry)) - m_userDataRegMapping[ShaderStage::TessEval] = m_userDataRegMapping[ShaderStage::Geometry]; - else - m_userDataRegMapping[ShaderStage::TessEval] = mmSPI_SHADER_USER_DATA_VS_0; - m_userDataRegMapping[ShaderStage::TessControl] = mmSPI_SHADER_USER_DATA_HS_0; - if (m_pipelineState->hasShaderStage(ShaderStage::TessControl)) - m_userDataRegMapping[ShaderStage::Vertex] = m_userDataRegMapping[ShaderStage::TessControl]; - else if (m_pipelineState->hasShaderStage(ShaderStage::Geometry)) - m_userDataRegMapping[ShaderStage::Vertex] = m_userDataRegMapping[ShaderStage::Geometry]; - else - m_userDataRegMapping[ShaderStage::Vertex] = mmSPI_SHADER_USER_DATA_VS_0; - - } else if (!m_pipelineState->getNggControl()->enableNgg) { + if (!m_pipelineState->getNggControl()->enableNgg) { // GFX10+ not NGG: Same as GFX9, except ES-GS user data goes into GS registers. m_userDataRegMapping[ShaderStage::CopyShader] = mmSPI_SHADER_USER_DATA_VS_0; m_userDataRegMapping[ShaderStage::Geometry] = mmSPI_SHADER_USER_DATA_GS_0; @@ -480,8 +464,7 @@ void PalMetadata::setUserDataEntry(ShaderStageEnum stage, unsigned userDataIndex // Assert that the supplied user data index is not too big. bool inRange = userDataIndex + dwordCount <= 16; - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 9 && stage != ShaderStage::Compute && - stage != ShaderStage::Task) + if (stage != ShaderStage::Compute && stage != ShaderStage::Task) inRange = userDataIndex + dwordCount <= 32; assert(inRange && "Out of range user data index"); (void(inRange)); // Unused @@ -620,8 +603,7 @@ void PalMetadata::finalizeRegisterSettings(bool isWholePipeline) { if (m_pipelineState->useRegisterFieldFormat()) { auto graphicsRegNode = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters].getMap(true); - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 9 && - m_pipelineState->getColorExportState().alphaToCoverageEnable) { + if (m_pipelineState->getColorExportState().alphaToCoverageEnable) { auto dbShaderControl = graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::DbShaderControl].getMap(true); dbShaderControl[Util::Abi::DbShaderControlMetadataKey::AlphaToMaskDisable] = false; } @@ -633,14 +615,12 @@ void PalMetadata::finalizeRegisterSettings(bool isWholePipeline) { static_cast(waveBreakSize); } - if (m_pipelineState->getTargetInfo().getGfxIpVersion() >= GfxIpVersion{9, 0, 0}) { - if (m_pipelineState->getRasterizerState().innerCoverage) - graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::AaCoverageToShaderSelect] = - serializeEnum(Util::Abi::CoverageToShaderSel(INPUT_INNER_COVERAGE)); - else - graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::AaCoverageToShaderSelect] = - serializeEnum(Util::Abi::CoverageToShaderSel(INPUT_COVERAGE)); - } + if (m_pipelineState->getRasterizerState().innerCoverage) + graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::AaCoverageToShaderSelect] = + serializeEnum(Util::Abi::CoverageToShaderSel(INPUT_INNER_COVERAGE)); + else + graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::AaCoverageToShaderSelect] = + serializeEnum(Util::Abi::CoverageToShaderSel(INPUT_COVERAGE)); } else { // Set PA_CL_CLIP_CNTL from pipeline state settings. // DX_CLIP_SPACE_DEF, ZCLIP_NEAR_DISABLE and ZCLIP_FAR_DISABLE are now set internally by PAL (as of @@ -651,8 +631,7 @@ void PalMetadata::finalizeRegisterSettings(bool isWholePipeline) { paClClipCntl.bits.DX_RASTERIZATION_KILL = rasterizerDiscardEnable; setRegister(mmPA_CL_CLIP_CNTL, paClClipCntl.u32All); - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 9 && - m_pipelineState->getColorExportState().alphaToCoverageEnable) { + if (m_pipelineState->getColorExportState().alphaToCoverageEnable) { DB_SHADER_CONTROL dbShaderControl = {}; dbShaderControl.u32All = getRegister(mmDB_SHADER_CONTROL); dbShaderControl.bitfields.ALPHA_TO_MASK_DISABLE = 0; @@ -666,15 +645,13 @@ void PalMetadata::finalizeRegisterSettings(bool isWholePipeline) { setRegister(mmPA_SC_SHADER_CONTROL, paScShaderControl.u32All); } - if (m_pipelineState->getTargetInfo().getGfxIpVersion() >= GfxIpVersion{9, 0, 0}) { - PA_SC_AA_CONFIG paScAaConfig = {}; - if (m_pipelineState->getRasterizerState().innerCoverage) { - paScAaConfig.bitfields.COVERAGE_TO_SHADER_SELECT = INPUT_INNER_COVERAGE; - } else { - paScAaConfig.bitfields.COVERAGE_TO_SHADER_SELECT = INPUT_COVERAGE; - } - setRegister(mmPA_SC_AA_CONFIG, paScAaConfig.u32All); + PA_SC_AA_CONFIG paScAaConfig = {}; + if (m_pipelineState->getRasterizerState().innerCoverage) { + paScAaConfig.bitfields.COVERAGE_TO_SHADER_SELECT = INPUT_INNER_COVERAGE; + } else { + paScAaConfig.bitfields.COVERAGE_TO_SHADER_SELECT = INPUT_COVERAGE; } + setRegister(mmPA_SC_AA_CONFIG, paScAaConfig.u32All); } } @@ -1019,19 +996,17 @@ void PalMetadata::updateCbShaderMask(unsigned cbShaderMask) { // Updates the DB shader control that depends on the CB state. // void PalMetadata::updateDbShaderControl() { - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 9) { - if (m_pipelineState->useRegisterFieldFormat()) { - auto dbShaderControl = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters] - .getMap(true)[Util::Abi::GraphicsRegisterMetadataKey::DbShaderControl] - .getMap(true); - dbShaderControl[Util::Abi::DbShaderControlMetadataKey::AlphaToMaskDisable] = - !m_pipelineState->getColorExportState().alphaToCoverageEnable; - } else { - DB_SHADER_CONTROL dbShaderControl = {}; - dbShaderControl.u32All = getRegister(mmDB_SHADER_CONTROL); - dbShaderControl.bitfields.ALPHA_TO_MASK_DISABLE = !m_pipelineState->getColorExportState().alphaToCoverageEnable; - setRegister(mmDB_SHADER_CONTROL, dbShaderControl.u32All); - } + if (m_pipelineState->useRegisterFieldFormat()) { + auto dbShaderControl = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters] + .getMap(true)[Util::Abi::GraphicsRegisterMetadataKey::DbShaderControl] + .getMap(true); + dbShaderControl[Util::Abi::DbShaderControlMetadataKey::AlphaToMaskDisable] = + !m_pipelineState->getColorExportState().alphaToCoverageEnable; + } else { + DB_SHADER_CONTROL dbShaderControl = {}; + dbShaderControl.u32All = getRegister(mmDB_SHADER_CONTROL); + dbShaderControl.bitfields.ALPHA_TO_MASK_DISABLE = !m_pipelineState->getColorExportState().alphaToCoverageEnable; + setRegister(mmDB_SHADER_CONTROL, dbShaderControl.u32All); } } @@ -1251,13 +1226,8 @@ unsigned PalMetadata::getFirstUserDataReg(unsigned callingConv) { {CallingConv::AMDGPU_LS, mmSPI_SHADER_USER_DATA_LS_0}, {CallingConv::AMDGPU_HS, mmSPI_SHADER_USER_DATA_HS_0}, {CallingConv::AMDGPU_ES, mmSPI_SHADER_USER_DATA_ES_0}, {CallingConv::AMDGPU_GS, mmSPI_SHADER_USER_DATA_GS_0}, {CallingConv::AMDGPU_VS, mmSPI_SHADER_USER_DATA_VS_0}, {CallingConv::AMDGPU_CS, mmCOMPUTE_PGM_RSRC1}}; - static const ArrayMap shaderTableGfx9 = { - {CallingConv::AMDGPU_LS, mmSPI_SHADER_USER_DATA_LS_0}, {CallingConv::AMDGPU_HS, mmSPI_SHADER_USER_DATA_HS_0}, - {CallingConv::AMDGPU_ES, mmSPI_SHADER_USER_DATA_ES_0}, {CallingConv::AMDGPU_GS, mmSPI_SHADER_USER_DATA_ES_0}, - {CallingConv::AMDGPU_VS, mmSPI_SHADER_USER_DATA_VS_0}, {CallingConv::AMDGPU_CS, mmCOMPUTE_USER_DATA_0}}; - bool isGfx9 = m_pipelineState->getTargetInfo().getGfxIpVersion().major == 9; - ArrayRef currentShaderTable(isGfx9 ? shaderTableGfx9 : shaderTable); + ArrayRef currentShaderTable(shaderTable); return findValueInArrayMap(currentShaderTable, callingConv); } @@ -1272,10 +1242,8 @@ unsigned PalMetadata::getNumberOfSgprsBeforeUserData(unsigned callingConv) { case CallingConv::AMDGPU_PS: return 0; default: - // GFX9+ merged shader have an extra 8 SGPRs before user data. - if (m_pipelineState->getTargetInfo().getGfxIpVersion() >= GfxIpVersion{9, 0, 0}) - return 8; - return 0; + // Merged shader have an extra 8 SGPRs before user data. + return 8; } } diff --git a/lgc/state/PipelineState.cpp b/lgc/state/PipelineState.cpp index 74820c0faa..e5895c0421 100644 --- a/lgc/state/PipelineState.cpp +++ b/lgc/state/PipelineState.cpp @@ -331,7 +331,7 @@ ComputeShaderMode Pipeline::getComputeShaderMode(Module &module) { // @param emitLgc : Whether the option -emit-lgc is on PipelineState::PipelineState(LgcContext *builderContext, bool emitLgc) : Pipeline(builderContext), m_emitLgc(emitLgc), m_meshRowExport(EnableRowExport) { - m_registerFieldFormat = getTargetInfo().getGfxIpVersion().major >= 9 && UseRegisterFieldFormat; + m_registerFieldFormat = UseRegisterFieldFormat; m_tessLevel.inner[0] = -1.0f; m_tessLevel.inner[1] = -1.0f; m_tessLevel.outer[0] = -1.0f; @@ -1354,11 +1354,7 @@ unsigned PipelineState::getShaderWaveSize(ShaderStageEnum stage) { if (!m_waveSize[stage]) setShaderDefaultWaveSize(stage); - if (getTargetInfo().getGfxIpVersion().major >= 9) { - return getMergedShaderWaveSize(stage); - } - - return m_waveSize[stage]; + return getMergedShaderWaveSize(stage); } // ===================================================================================================================== @@ -1368,7 +1364,7 @@ unsigned PipelineState::getShaderWaveSize(ShaderStageEnum stage) { // // @param stage : Shader stage unsigned PipelineState::getMergedShaderWaveSize(ShaderStageEnum stage) { - assert(getTargetInfo().getGfxIpVersion().major >= 9); + assert(getTargetInfo().getGfxIpVersion().major >= 10); unsigned waveSize = m_waveSize[stage]; // NOTE: For GFX9+, two shaders are merged as a shader pair. The wave size is determined by the larger one. That is @@ -1880,6 +1876,8 @@ unsigned PipelineState::getVerticesPerPrimitive() { case lgc::PrimitiveType::TriangleListAdjacency: case lgc::PrimitiveType::TriangleStripAdjacency: return 3; + case lgc::PrimitiveType::Patch: + return 1; default: break; } diff --git a/lgc/state/ShaderStage.cpp b/lgc/state/ShaderStage.cpp index e97961b48d..20a733c79c 100644 --- a/lgc/state/ShaderStage.cpp +++ b/lgc/state/ShaderStage.cpp @@ -150,7 +150,7 @@ Function *lgc::addFunctionArgs(Function *oldFunc, Type *retTy, ArrayRef if (!retTy) retTy = oldFuncTy->getReturnType(); auto newFuncTy = FunctionType::get(retTy, allArgTys, false); - Function *newFunc = Function::Create(newFuncTy, oldFunc->getLinkage()); + Function *newFunc = createFunctionHelper(newFuncTy, oldFunc->getLinkage(), oldFunc->getParent()); newFunc->setCallingConv(oldFunc->getCallingConv()); newFunc->takeName(oldFunc); newFunc->setSubprogram(oldFunc->getSubprogram()); diff --git a/lgc/test/InOutPackingNonZeroBase.lgc b/lgc/test/InOutPackingNonZeroBase.lgc index eb2f673b07..a49f9c61d7 100644 --- a/lgc/test/InOutPackingNonZeroBase.lgc +++ b/lgc/test/InOutPackingNonZeroBase.lgc @@ -41,7 +41,7 @@ target triple = "amdgcn--amdpal" define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spirv.ExecutionModel !9 !lgc.shaderstage !10 { ; IR-LABEL: @lgc.shader.VS.main( ; IR-NEXT: .entry: -; IR-NEXT: [[TMP0:%.*]] = call <3 x float> @lgc.input.import.generic.v3f32(i1 false, i32 1, i32 0, i32 0, i32 poison) +; IR-NEXT: [[TMP0:%.*]] = call <3 x float> @lgc.input.import.generic__v3f32(i1 false, i32 1, i32 0, i32 0, i32 poison) ; IR-NEXT: [[TMP1:%.*]] = extractelement <3 x float> [[TMP0]], i64 0 ; IR-NEXT: [[TMP2:%.*]] = extractelement <3 x float> [[TMP0]], i64 1 ; IR-NEXT: [[TMP3:%.*]] = extractelement <3 x float> [[TMP0]], i64 2 @@ -58,7 +58,7 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spi ; IR-NEXT: ret void ; .entry: - %0 = call <3 x float> (...) @lgc.create.read.generic.input.v3f32(i32 1, i32 0, i32 0, i32 0, i32 0, i32 poison) + %0 = call <3 x float> (...) @lgc.create.read.generic.input__v3f32(i32 1, i32 0, i32 0, i32 0, i32 0, i32 poison) %1 = extractelement <3 x float> %0, i64 0 %2 = extractelement <3 x float> %0, i64 1 @@ -72,7 +72,7 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spi } ; Function Attrs: nounwind {{readonly willreturn|willreturn memory\(read\)}} -declare <3 x float> @lgc.create.read.generic.input.v3f32(...) local_unnamed_addr #1 +declare <3 x float> @lgc.create.read.generic.input__v3f32(...) local_unnamed_addr #1 ; Function Attrs: nounwind declare void @lgc.create.write.generic.output(...) local_unnamed_addr #0 @@ -82,25 +82,25 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; IR-LABEL: @lgc.shader.FS.main( ; IR-NEXT: .entry: ; IR-NEXT: [[INTERPPERSPCENTER:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspCenter.v2f32.i32(i32 268435457) #[[ATTR4:[0-9]+]] -; IR-NEXT: [[TMP0:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 9, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]]) +; IR-NEXT: [[TMP0:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 9, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]]) ; IR-NEXT: [[INTERPPERSPCENTER1:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspCenter.v2f32.i32(i32 268435457) #[[ATTR4]] -; IR-NEXT: [[TMP1:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 8, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER1]]) +; IR-NEXT: [[TMP1:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 8, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER1]]) ; IR-NEXT: [[INTERPPERSPCENTER2:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspCenter.v2f32.i32(i32 268435457) #[[ATTR4]] -; IR-NEXT: [[TMP2:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 7, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER2]]) +; IR-NEXT: [[TMP2:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 7, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER2]]) ; IR-NEXT: [[INTERPPERSPCENTER3:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspCenter.v2f32.i32(i32 268435457) #[[ATTR4]] -; IR-NEXT: [[TMP3:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 6, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER3]]) +; IR-NEXT: [[TMP3:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 6, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER3]]) ; IR-NEXT: [[INTERPPERSPCENTER4:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspCenter.v2f32.i32(i32 268435457) #[[ATTR4]] -; IR-NEXT: [[TMP4:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 5, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER4]]) +; IR-NEXT: [[TMP4:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 5, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER4]]) ; IR-NEXT: [[INTERPPERSPCENTER5:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspCenter.v2f32.i32(i32 268435457) #[[ATTR4]] -; IR-NEXT: [[TMP5:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 4, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER5]]) +; IR-NEXT: [[TMP5:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 4, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER5]]) ; IR-NEXT: [[INTERPPERSPCENTER6:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspCenter.v2f32.i32(i32 268435457) #[[ATTR4]] -; IR-NEXT: [[TMP6:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 3, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER6]]) +; IR-NEXT: [[TMP6:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 3, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER6]]) ; IR-NEXT: [[INTERPPERSPCENTER7:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspCenter.v2f32.i32(i32 268435457) #[[ATTR4]] -; IR-NEXT: [[TMP7:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 2, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER7]]) +; IR-NEXT: [[TMP7:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 2, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER7]]) ; IR-NEXT: [[INTERPPERSPCENTER8:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspCenter.v2f32.i32(i32 268435457) #[[ATTR4]] -; IR-NEXT: [[TMP8:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 1, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER8]]) +; IR-NEXT: [[TMP8:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 1, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER8]]) ; IR-NEXT: [[INTERPPERSPCENTER9:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspCenter.v2f32.i32(i32 268435457) #[[ATTR4]] -; IR-NEXT: [[TMP9:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 0, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER9]]) +; IR-NEXT: [[TMP9:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 0, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER9]]) ; IR-NEXT: [[TMP10:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[TMP9]], [[TMP8]] ; IR-NEXT: [[TMP11:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[TMP10]], [[TMP7]] ; IR-NEXT: [[TMP12:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[TMP11]], [[TMP6]] @@ -116,16 +116,16 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; IR-NEXT: ret void ; .entry: - %0 = call float (...) @lgc.create.read.generic.input.f32(i32 9, i32 0, i32 0, i32 0, i32 16, i32 poison) - %1 = call float (...) @lgc.create.read.generic.input.f32(i32 8, i32 0, i32 0, i32 0, i32 16, i32 poison) - %2 = call float (...) @lgc.create.read.generic.input.f32(i32 7, i32 0, i32 0, i32 0, i32 16, i32 poison) - %3 = call float (...) @lgc.create.read.generic.input.f32(i32 6, i32 0, i32 0, i32 0, i32 16, i32 poison) - %4 = call float (...) @lgc.create.read.generic.input.f32(i32 5, i32 0, i32 0, i32 0, i32 16, i32 poison) - %5 = call float (...) @lgc.create.read.generic.input.f32(i32 4, i32 0, i32 0, i32 0, i32 16, i32 poison) - %6 = call float (...) @lgc.create.read.generic.input.f32(i32 3, i32 0, i32 0, i32 0, i32 16, i32 poison) - %7 = call float (...) @lgc.create.read.generic.input.f32(i32 2, i32 0, i32 0, i32 0, i32 16, i32 poison) - %8 = call float (...) @lgc.create.read.generic.input.f32(i32 1, i32 0, i32 0, i32 0, i32 16, i32 poison) - %9 = call float (...) @lgc.create.read.generic.input.f32(i32 0, i32 0, i32 0, i32 0, i32 16, i32 poison) + %0 = call float (...) @lgc.create.read.generic.input__f32(i32 9, i32 0, i32 0, i32 0, i32 16, i32 poison) + %1 = call float (...) @lgc.create.read.generic.input__f32(i32 8, i32 0, i32 0, i32 0, i32 16, i32 poison) + %2 = call float (...) @lgc.create.read.generic.input__f32(i32 7, i32 0, i32 0, i32 0, i32 16, i32 poison) + %3 = call float (...) @lgc.create.read.generic.input__f32(i32 6, i32 0, i32 0, i32 0, i32 16, i32 poison) + %4 = call float (...) @lgc.create.read.generic.input__f32(i32 5, i32 0, i32 0, i32 0, i32 16, i32 poison) + %5 = call float (...) @lgc.create.read.generic.input__f32(i32 4, i32 0, i32 0, i32 0, i32 16, i32 poison) + %6 = call float (...) @lgc.create.read.generic.input__f32(i32 3, i32 0, i32 0, i32 0, i32 16, i32 poison) + %7 = call float (...) @lgc.create.read.generic.input__f32(i32 2, i32 0, i32 0, i32 0, i32 16, i32 poison) + %8 = call float (...) @lgc.create.read.generic.input__f32(i32 1, i32 0, i32 0, i32 0, i32 16, i32 poison) + %9 = call float (...) @lgc.create.read.generic.input__f32(i32 0, i32 0, i32 0, i32 0, i32 16, i32 poison) %10 = fadd reassoc nnan nsz arcp contract afn float %9, %8 %11 = fadd reassoc nnan nsz arcp contract afn float %10, %7 %12 = fadd reassoc nnan nsz arcp contract afn float %11, %6 @@ -141,7 +141,7 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ret void } -declare float @lgc.create.read.generic.input.f32(...) local_unnamed_addr #1 +declare float @lgc.create.read.generic.input__f32(...) local_unnamed_addr #1 attributes #0 = { nounwind } attributes #1 = { nounwind willreturn } diff --git a/lgc/test/ScalarizeInputWithDynamicIndexUser.lgc b/lgc/test/ScalarizeInputWithDynamicIndexUser.lgc index 35a1a10e71..3800da232d 100644 --- a/lgc/test/ScalarizeInputWithDynamicIndexUser.lgc +++ b/lgc/test/ScalarizeInputWithDynamicIndexUser.lgc @@ -10,9 +10,9 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !lgc.shaderstage !15 { .entry: - %0 = call <4 x float> (...) @lgc.create.read.generic.input.v4f32(i32 2, i32 0, i32 0, i32 0, i32 0, i32 poison) - %1 = call <4 x float> (...) @lgc.create.read.generic.input.v4f32(i32 1, i32 0, i32 0, i32 0, i32 0, i32 poison) - %2 = call <4 x float> (...) @lgc.create.read.generic.input.v4f32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) + %0 = call <4 x float> (...) @lgc.create.read.generic.input__v4f32(i32 2, i32 0, i32 0, i32 0, i32 0, i32 poison) + %1 = call <4 x float> (...) @lgc.create.read.generic.input__v4f32(i32 1, i32 0, i32 0, i32 0, i32 0, i32 poison) + %2 = call <4 x float> (...) @lgc.create.read.generic.input__v4f32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) call void (...) @lgc.create.write.generic.output(<4 x float> %2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef) call void (...) @lgc.create.write.generic.output(<4 x float> %1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 undef) call void (...) @lgc.create.write.generic.output(<4 x float> %0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 undef) @@ -20,7 +20,7 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !lgc } ; Function Attrs: nounwind readonly willreturn -declare <4 x float> @lgc.create.read.generic.input.v4f32(...) local_unnamed_addr #1 +declare <4 x float> @lgc.create.read.generic.input__v4f32(...) local_unnamed_addr #1 ; Function Attrs: nounwind declare void @lgc.create.write.generic.output(...) local_unnamed_addr #0 @@ -30,13 +30,13 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !lgc ; CHECK-LABEL: @lgc.shader.FS.main( ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[INTERPPERSPCENTER:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspCenter.v2f32.i32(i32 268435457) #[[ATTR1:[0-9]+]] -; CHECK-NEXT: [[TMP0:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 2, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]]) +; CHECK-NEXT: [[TMP0:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 2, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]]) ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 2, i32 0, i32 1, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]]) +; CHECK-NEXT: [[TMP2:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 2, i32 0, i32 1, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]]) ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP1]], float [[TMP2]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 2, i32 0, i32 2, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]]) +; CHECK-NEXT: [[TMP4:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 2, i32 0, i32 2, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]]) ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP4]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 2, i32 0, i32 3, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]]) +; CHECK-NEXT: [[TMP6:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 2, i32 0, i32 3, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]]) ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP6]], i64 3 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 12816, i32 0, i32 4) ; CHECK-NEXT: [[TMP9:%.*]] = select i1 false, i32 12816, i32 [[TMP8]] @@ -47,8 +47,8 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !lgc ; CHECK-NEXT: ret void ; .entry: - %input0 = call <4 x float> (...) @lgc.create.read.generic.input.v4f32(i32 2, i32 0, i32 0, i32 0, i32 16, i32 poison) - %idx0 = call i32 (...) @lgc.create.extract.bit.field.i32(i32 12816, i32 0, i32 4, i1 false) + %input0 = call <4 x float> (...) @lgc.create.read.generic.input__v4f32(i32 2, i32 0, i32 0, i32 0, i32 16, i32 poison) + %idx0 = call i32 (...) @lgc.create.extract.bit.field__i32(i32 12816, i32 0, i32 4, i1 false) %input1 = extractelement <4 x float> %input0, i32 %idx0 %output = insertelement <4 x float> undef, float %input1, i32 0 call void (...) @lgc.create.write.generic.output(<4 x float> %output, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef) @@ -56,7 +56,7 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !lgc } ; Function Attrs: nounwind readnone -declare i32 @lgc.create.extract.bit.field.i32(...) local_unnamed_addr #3 +declare i32 @lgc.create.extract.bit.field__i32(...) local_unnamed_addr #3 attributes #0 = { nounwind } attributes #1 = { nounwind readonly willreturn } diff --git a/lgc/test/ShaderStages.lgc b/lgc/test/ShaderStages.lgc index 2e58f2bf43..ce9c5c6ab3 100644 --- a/lgc/test/ShaderStages.lgc +++ b/lgc/test/ShaderStages.lgc @@ -2,8 +2,8 @@ ; Extract 1: CS ; RUN: lgc -extract=1 -print-after=lgc-patch-setup-target-features -mcpu=gfx1010 %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK1,CHECK-NGG1 %s -; CHECK-NGG1: define dllexport amdgpu_cs void @_amdgpu_cs_main{{.*}} !lgc.shaderstage !3 { -; CHECK1: !3 = !{i32 7} +; CHECK-NGG1: define dllexport amdgpu_cs void @_amdgpu_cs_main{{.*}} !lgc.shaderstage [[cs_stage:![0-9]*]] { +; CHECK1: [[cs_stage]] = !{i32 7} define dllexport spir_func void @lgc.shader.CS.main() local_unnamed_addr #0 !lgc.shaderstage !0 { .entry: @@ -29,10 +29,10 @@ attributes #0 = { nounwind } ; RUN: lgc -extract=2 -print-after=lgc-patch-setup-target-features -mcpu=gfx1010 %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK-NGG2 %s -; CHECK-NGG2: define dllexport amdgpu_gs void @_amdgpu_gs_main{{.*}} !lgc.shaderstage !3 { -; CHECK-NGG2: define dllexport amdgpu_ps void @_amdgpu_ps_main{{.*}} !lgc.shaderstage !4 { -; CHECK-NGG2: !3 = !{i32 1} -; CHECK-NGG2: !4 = !{i32 6} +; CHECK-NGG2: define dllexport amdgpu_gs void @_amdgpu_gs_main{{.*}} !lgc.shaderstage [[vert_stage:![0-9]*]] { +; CHECK-NGG2: define dllexport amdgpu_ps void @_amdgpu_ps_main{{.*}} !lgc.shaderstage [[frag_stage:![0-9]*]] { +; CHECK-NGG2: [[vert_stage]] = !{i32 1} +; CHECK-NGG2: [[frag_stage]] = !{i32 6} define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !lgc.shaderstage !0 { .entry: @@ -114,10 +114,10 @@ declare void @lgc.output.export.builtin.PointSize.i32.f32(i32, float) #0 declare void @lgc.output.export.builtin.Position.i32.v4f32(i32, <4 x float>) #0 ; Function Attrs: nounwind readonly -declare float @lgc.input.import.generic.f32.i32.i32.i32(i32, i32, i32) #1 +declare float @lgc.input.import.generic__f32.i32.i32.i32(i32, i32, i32) #1 ; Function Attrs: nounwind readonly -declare <4 x double> @lgc.input.import.generic.v4f64.i32.i32.i32(i32, i32, i32) #1 +declare <4 x double> @lgc.input.import.generic__v4f64.i32.i32.i32(i32, i32, i32) #1 ; Function Attrs: nounwind declare i32 @lgc.input.import.builtin.GsWaveId.i32.i32(i32) #0 @@ -151,12 +151,12 @@ attributes #0 = { nounwind } ; RUN: lgc -extract=4 -print-after=lgc-patch-setup-target-features -mcpu=gfx1010 %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK-NGG4 %s -; CHECK-NGG4: define dllexport amdgpu_gs void @_amdgpu_gs_main{{.*}} !lgc.shaderstage !5 { -; CHECK-NGG4: define dllexport amdgpu_hs void @_amdgpu_hs_main{{.*}} !lgc.shaderstage !6 { -; CHECK-NGG4: define dllexport amdgpu_ps void @_amdgpu_ps_main{{.*}} !lgc.shaderstage !7 { -; CHECK-NGG4: !5 = !{i32 3} -; CHECK-NGG4: !6 = !{i32 2} -; CHECK-NGG4: !7 = !{i32 6} +; CHECK-NGG4: define dllexport amdgpu_gs void @_amdgpu_gs_main{{.*}} !lgc.shaderstage [[tc_stage:![0-9]*]] { +; CHECK-NGG4: define dllexport amdgpu_hs void @_amdgpu_hs_main{{.*}} !lgc.shaderstage [[te_stage:![0-9]*]] { +; CHECK-NGG4: define dllexport amdgpu_ps void @_amdgpu_ps_main{{.*}} !lgc.shaderstage [[frag_stage:![0-9]*]] { +; CHECK-NGG4: [[tc_stage]] = !{i32 3} +; CHECK-NGG4: [[te_stage]] = !{i32 2} +; CHECK-NGG4: [[frag_stage]] = !{i32 6} define dllexport spir_func void @lgc.shader.TCS.main() local_unnamed_addr #0 !lgc.shaderstage !5 { .entry: @@ -222,8 +222,8 @@ attributes #1 = { nounwind readonly } ; RUN: lgc -extract=5 -print-after=lgc-patch-setup-target-features -mcpu=gfx1010 %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK-NGG5 %s -; CHECK-NGG5: define dllexport amdgpu_hs void @_amdgpu_hs_main{{.*}} !lgc.shaderstage !5 { -; CHECK-NGG5: !5 = !{i32 2} +; CHECK-NGG5: define dllexport amdgpu_hs void @_amdgpu_hs_main{{.*}} !lgc.shaderstage [[tc_stage:![0-9]*]] { +; CHECK-NGG5: [[tc_stage]] = !{i32 2} define dllexport spir_func void @lgc.shader.TCS.main() local_unnamed_addr #0 !lgc.shaderstage !5 { .entry: @@ -275,10 +275,10 @@ attributes #1 = { nounwind readonly } ; RUN: lgc -extract=6 -print-after=lgc-patch-setup-target-features -mcpu=gfx1010 %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK-NGG6 %s -; CHECK-NGG6: define dllexport amdgpu_gs void @_amdgpu_gs_main{{.*}} !lgc.shaderstage !5 { -; CHECK-NGG6: define dllexport amdgpu_ps void @_amdgpu_ps_main{{.*}} !lgc.shaderstage !6 { -; CHECK-NGG6: !5 = !{i32 3} -; CHECK-NGG6: !6 = !{i32 6} +; CHECK-NGG6: define dllexport amdgpu_gs void @_amdgpu_gs_main{{.*}} !lgc.shaderstage [[te_stage:![0-9]*]] { +; CHECK-NGG6: define dllexport amdgpu_ps void @_amdgpu_ps_main{{.*}} !lgc.shaderstage [[frag_stage:![0-9]*]] { +; CHECK-NGG6: [[te_stage]] = !{i32 3} +; CHECK-NGG6: [[frag_stage]] = !{i32 6} define dllexport spir_func void @lgc.shader.TES.main() local_unnamed_addr #0 !lgc.shaderstage !5 { .entry: @@ -423,10 +423,10 @@ declare void @lgc.output.export.builtin.PointSize.i32.f32(i32, float) #0 declare void @lgc.output.export.builtin.Position.i32.v4f32(i32, <4 x float>) #0 ; Function Attrs: nounwind readonly -declare float @lgc.input.import.generic.f32.i32.i32.i32(i32, i32, i32) #1 +declare float @lgc.input.import.generic__f32.i32.i32.i32(i32, i32, i32) #1 ; Function Attrs: nounwind readonly -declare <4 x double> @lgc.input.import.generic.v4f64.i32.i32.i32(i32, i32, i32) #1 +declare <4 x double> @lgc.input.import.generic__v4f64.i32.i32.i32(i32, i32, i32) #1 ; Function Attrs: nounwind declare i32 @lgc.input.import.builtin.GsWaveId.i32.i32(i32) #0 diff --git a/lgc/test/TestWaterfallLoopForStruct.lgc b/lgc/test/TestWaterfallLoopForStruct.lgc index 7b88e9ccdc..448a7ceee2 100644 --- a/lgc/test/TestWaterfallLoopForStruct.lgc +++ b/lgc/test/TestWaterfallLoopForStruct.lgc @@ -8,9 +8,9 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spirv.ExecutionModel !11 !lgc.shaderstage !1 { .entry: - %0 = call i32 (...) @lgc.create.read.generic.input.i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) + %0 = call i32 (...) @lgc.create.read.generic.input__i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) %1 = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) - %2 = call i32 (...) @lgc.create.get.desc.stride.i32(i32 1, i32 1, i64 0, i32 0) + %2 = call i32 (...) @lgc.create.get.desc.stride__i32(i32 1, i32 1, i64 0, i32 0) %3 = insertvalue { ptr addrspace(4), i32, i32, i32 } poison, i32 %2, 1 %4 = insertvalue { ptr addrspace(4), i32, i32, i32 } %3, i32 32, 2 %5 = insertvalue { ptr addrspace(4), i32, i32, i32 } %4, i32 1, 3 @@ -35,7 +35,7 @@ declare spir_func void @spirv.NonUniform.i32(i32) local_unnamed_addr declare ptr addrspace(4) @lgc.create.get.desc.ptr.p4(...) local_unnamed_addr #1 ; Function Attrs: nounwind memory(none) -declare i32 @lgc.create.get.desc.stride.i32(...) local_unnamed_addr #1 +declare i32 @lgc.create.get.desc.stride__i32(...) local_unnamed_addr #1 declare spir_func void @"spirv.NonUniform.s[p4,i32,i32,i32]"({ ptr addrspace(4), i32, i32, i32 }) local_unnamed_addr @@ -45,7 +45,7 @@ declare spir_func void @spirv.NonUniform.a3v8i32([3 x <8 x i32>]) local_unnamed_ declare { <4 x float>, i32 } @"lgc.create.image.load.s[v4f32,i32]"(...) local_unnamed_addr #2 ; Function Attrs: nounwind willreturn memory(read) -declare i32 @lgc.create.read.generic.input.i32(...) local_unnamed_addr #2 +declare i32 @lgc.create.read.generic.input__i32(...) local_unnamed_addr #2 ; Function Attrs: nounwind declare void @lgc.create.write.generic.output(...) local_unnamed_addr #0 @@ -80,8 +80,8 @@ attributes #2 = { nounwind willreturn memory(read) } ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @lgc.input.import.generic.i32(i1 false, i32 0, i32 0, i32 0, i32 poison) -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @lgc.load.user.data.i32(i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @lgc.input.import.generic__i32(i1 false, i32 0, i32 0, i32 0, i32 poison) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @lgc.load.user.data__i32(i32 0) ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP3]], i64 0 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr addrspace(4) diff --git a/lgc/test/TextureRange.lgc b/lgc/test/TextureRange.lgc index 3cc0b9881a..0f50ab4c50 100644 --- a/lgc/test/TextureRange.lgc +++ b/lgc/test/TextureRange.lgc @@ -1,13 +1,13 @@ ; RUN: lgc %s -print-after=lgc-lower-desc -o /dev/null 2>&1 - <%s | FileCheck --check-prefixes=CHECK %s -; CHECK: call <2 x i32> @lgc.load.user.data.v2i32(i32 24) +; CHECK: call <2 x i32> @lgc.load.user.data__v2i32(i32 24) ; CHECK: call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> ; CHECK: [[varindex0:%[0-9]+]] = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> ; CHECK: [[varindex1:%[0-9]+]] = load i32, ptr addrspace(7) [[varindex0]], align 4 ; CHECK-NEXT: [[varindex2:%[0-9]+]] = sext i32 [[varindex1]] to i64 ; CHECK-NEXT: getelementptr <{ [4294967295 x float] }>, ptr addrspace(7) %{{.*}}, i64 0, i32 0, i64 [[varindex2]] -; CHECK: [[desc1lo:%[0-9]+]] = call i32 @lgc.load.user.data.i32(i32 4 +; CHECK: [[desc1lo:%[0-9]+]] = call i32 @lgc.load.user.data__i32(i32 4 ; CHECK-NEXT: [[desc1vec:%[0-9]+]] = insertelement <2 x i32> %{{[^,]+}}, i32 [[desc1lo]], i64 0 ; CHECK-NEXT: [[desc1lohi:%[0-9]+]] = bitcast <2 x i32> [[desc1vec]] to i64 ; CHECK-NEXT: [[desc1:%[0-9]+]] = inttoptr i64 [[desc1lohi]] to ptr addrspace(4) @@ -77,7 +77,7 @@ define dllexport spir_func void @lgc.shader.FS.PSMain() local_unnamed_addr #0 !s %3 = bitcast i8 addrspace(7)* %2 to <{ [4294967295 x float] }> addrspace(7)* %4 = call ptr addrspace(7) @lgc.load.buffer.desc(i64 2684354560, i32 3, i32 0, i32 0) %5 = call {}* @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) %4) - %6 = call <2 x float> (...) @lgc.create.read.generic.input.v2f32(i32 0, i32 0, i32 0, i32 0, i32 16, i32 poison) + %6 = call <2 x float> (...) @lgc.create.read.generic.input__v2f32(i32 0, i32 0, i32 0, i32 0, i32 16, i32 poison) %7 = load i32, ptr addrspace(7) %4, align 4 %8 = sext i32 %7 to i64 %9 = getelementptr <{ [4294967295 x float] }>, ptr addrspace(7) %2, i64 0, i32 0, i64 %8 @@ -86,12 +86,12 @@ define dllexport spir_func void @lgc.shader.FS.PSMain() local_unnamed_addr #0 !s %12 = load float, ptr addrspace(7) %11, align 4 %13 = fmul reassoc nnan nsz arcp contract afn float %10, %12 %14 = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 3221225472, i32 1) - %15 = call i32 (...) @lgc.create.get.desc.stride.i32(i32 1, i32 1, i64 3221225472, i32 1) + %15 = call i32 (...) @lgc.create.get.desc.stride__i32(i32 1, i32 1, i64 3221225472, i32 1) %16 = load <8 x i32>, ptr addrspace(4) %14, align 32 %17 = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 2147483648, i32 0) - %18 = call i32 (...) @lgc.create.get.desc.stride.i32(i32 2, i32 2, i64 2147483648, i32 0) + %18 = call i32 (...) @lgc.create.get.desc.stride__i32(i32 2, i32 2, i64 2147483648, i32 0) %19 = load <4 x i32>, ptr addrspace(4) %17, align 16 - %20 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %16, <4 x i32> %19, i32 1, <2 x float> %6) + %20 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample__v4f32(i32 1, i32 512, <8 x i32> %16, <4 x i32> %19, i32 1, <2 x float> %6) %.splatinsert = insertelement <4 x float> poison, float %13, i64 0 %21 = shufflevector <4 x float> %.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer %scale = fmul reassoc nnan nsz arcp contract afn <4 x float> %20, %21 @@ -106,16 +106,16 @@ declare ptr addrspace(7) @lgc.load.buffer.desc(i64, i32, i32, i32) local_unnamed declare {}* @llvm.invariant.start.p7(i64 immarg, ptr addrspace(7) nocapture) #2 ; Function Attrs: nounwind readonly willreturn -declare <2 x float> @lgc.create.read.generic.input.v2f32(...) local_unnamed_addr #1 +declare <2 x float> @lgc.create.read.generic.input__v2f32(...) local_unnamed_addr #1 ; Function Attrs: nounwind readnone declare ptr addrspace(4) @lgc.create.get.desc.ptr.p4(...) local_unnamed_addr #3 ; Function Attrs: nounwind readnone -declare i32 @lgc.create.get.desc.stride.i32(...) local_unnamed_addr #3 +declare i32 @lgc.create.get.desc.stride__i32(...) local_unnamed_addr #3 ; Function Attrs: nounwind readonly willreturn -declare <4 x float> @lgc.create.image.sample.v4f32(...) local_unnamed_addr #1 +declare <4 x float> @lgc.create.image.sample__v4f32(...) local_unnamed_addr #1 ; Function Attrs: nounwind declare void @lgc.create.write.generic.output(...) local_unnamed_addr #0 diff --git a/lgc/test/Transforms/Continufy/simple.lgc b/lgc/test/Transforms/Continufy/simple.lgc index 0d7e4cef86..f9e5197e16 100644 --- a/lgc/test/Transforms/Continufy/simple.lgc +++ b/lgc/test/Transforms/Continufy/simple.lgc @@ -49,7 +49,7 @@ declare i32 @lgc.shader.input.LocalInvocationId(i32) ; CHECK-NEXT: [[DST:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[P16]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[FN]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = call [2 x i32] (...) @lgc.cps.await.a2i32(i32 [[TMP2]], i32 4, i32 poison, i32 [[X]], ptr addrspace(1) [[DST]]) +; CHECK-NEXT: [[TMP3:%.*]] = call [2 x i32] (...) @lgc.cps.await__a2i32(i32 [[TMP2]], i32 4, i32 poison, i32 [[X]], ptr addrspace(1) [[DST]]) ; CHECK-NEXT: store [2 x i32] [[TMP3]], ptr addrspace(1) [[DST]], align 4 ; CHECK-NEXT: ret void ; @@ -60,7 +60,7 @@ declare i32 @lgc.shader.input.LocalInvocationId(i32) ; CHECK-NEXT: [[FN:%.*]] = load ptr, ptr addrspace(4) [[PUSHCONST]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[FN]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = call i32 (...) @lgc.cps.await.i32(i32 [[TMP2]], i32 2, i32 poison, i32 [[X]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 (...) @lgc.cps.await__i32(i32 [[TMP2]], i32 2, i32 poison, i32 [[X]]) ; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[RCR]], i32 4, {} poison, i32 poison, i32 poison, i32 [[TMP3]]) ; CHECK-NEXT: unreachable ; @@ -74,7 +74,7 @@ declare i32 @lgc.shader.input.LocalInvocationId(i32) ; CHECK-NEXT: [[PUSHCONST:%.*]] = call ptr addrspace(4) @lgc.user.data(i32 32) ; CHECK-NEXT: [[FN:%.*]] = load ptr, ptr addrspace(4) [[PUSHCONST]], align 8 ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[FN]] to i32 -; CHECK-NEXT: call void (...) @lgc.cps.await.isVoid(i32 [[TMP0]], i32 1, i32 poison) +; CHECK-NEXT: call void (...) @lgc.cps.await__isVoid(i32 [[TMP0]], i32 1, i32 poison) ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void diff --git a/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc b/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc index f1824f7734..7a2bc5bef7 100644 --- a/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc +++ b/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc @@ -9,7 +9,7 @@ declare ptr addrspace(32) @lgc.cps.get.vsp() #2 define dllexport spir_func void @lgc.shader.CS.main() local_unnamed_addr #0 !lgc.shaderstage !3 { .entry: - %desc = call <4 x i32> @lgc.load.user.data.v4i32(i32 0) + %desc = call <4 x i32> @lgc.load.user.data__v4i32(i32 0) %ptr = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc) %p0 = getelementptr i32, ptr addrspace(7) %ptr, i32 0 %i_vsp = load i32, ptr addrspace(7) %p0, align 4 @@ -30,7 +30,7 @@ define dllexport spir_func void @lgc.shader.CS.main() local_unnamed_addr #0 !lgc unreachable } -declare <4 x i32> @lgc.load.user.data.v4i32(i32) #4 +declare <4 x i32> @lgc.load.user.data__v4i32(i32) #4 declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>) #5 diff --git a/lgc/test/Transforms/CpsLowering/cps-from-continufy.lgc b/lgc/test/Transforms/CpsLowering/cps-from-continufy.lgc index 6282fab94a..11e6485e85 100644 --- a/lgc/test/Transforms/CpsLowering/cps-from-continufy.lgc +++ b/lgc/test/Transforms/CpsLowering/cps-from-continufy.lgc @@ -13,7 +13,7 @@ define spir_func void @_rgen_1({} %state, i32 %rcr) #0 !spirv.ExecutionModel !15 %4 = bitcast i64 %3 to <2 x i32> %5 = call i64 @llvm.amdgcn.s.getpc() %6 = bitcast i64 %5 to <2 x i32> - %7 = call i32 @lgc.load.user.data.i32(i32 20) + %7 = call i32 @lgc.load.user.data__i32(i32 20) %8 = insertelement <2 x i32> %6, i32 %7, i64 0 %9 = bitcast <2 x i32> %8 to i64 %10 = inttoptr i64 %9 to ptr addrspace(4) @@ -27,7 +27,7 @@ define spir_func void @_rgen_1({} %state, i32 %rcr) #0 !spirv.ExecutionModel !15 %18 = insertelement <4 x i32> %17, i32 -1, i64 2 %19 = insertelement <4 x i32> %18, i32 553734060, i64 3 %20 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %19) - %21 = call i32 @lgc.load.user.data.i32(i32 0) + %21 = call i32 @lgc.load.user.data__i32(i32 0) %22 = insertelement <2 x i32> %4, i32 %21, i64 0 %23 = bitcast <2 x i32> %22 to i64 %24 = inttoptr i64 %23 to ptr addrspace(4) @@ -36,7 +36,7 @@ define spir_func void @_rgen_1({} %state, i32 %rcr) #0 !spirv.ExecutionModel !15 %27 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %26) %28 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %0, i32 0, i32 0 store ptr addrspace(7) %27, ptr addrspace(32) %28, align 32 - %29 = call i32 @lgc.load.user.data.i32(i32 0) + %29 = call i32 @lgc.load.user.data__i32(i32 0) %30 = insertelement <2 x i32> %2, i32 %29, i64 0 %31 = bitcast <2 x i32> %30 to i64 %32 = inttoptr i64 %31 to ptr addrspace(4) @@ -76,7 +76,7 @@ entryresume.0: %.reload3 = load ptr addrspace(7), ptr addrspace(32) %5, align 32 %6 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %3, i32 0, i32 0 %.reload = load ptr addrspace(7), ptr addrspace(32) %6, align 32 - %dummy.udata = call i32 @lgc.load.user.data.i32(i32 20) + %dummy.udata = call i32 @lgc.load.user.data__i32(i32 20) %dummy.gep = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %3, i32 %dummy.udata, i32 0 %dummy.reload = load ptr addrspace(7), ptr addrspace(32) %dummy.gep, align 32 %7 = load volatile i32, ptr addrspace(7) %.reload3, align 4 @@ -87,7 +87,7 @@ entryresume.0: } ; Function Attrs: nounwind willreturn memory(none) -declare i32 @lgc.load.user.data.i32(i32) #1 +declare i32 @lgc.load.user.data__i32(i32) #1 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.amdgcn.s.getpc() #2 @@ -184,87 +184,90 @@ attributes #7 = { nounwind willreturn memory(inaccessiblemem: read) } ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP32]], i32 32 ; CHECK-NEXT: [[TMP34:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP33]], align 16 ; CHECK-NEXT: [[TMP35:%.*]] = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> [[TMP34]]) -; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[TMP9]] -; CHECK-NEXT: store ptr addrspace(7) [[TMP35]], ptr addrspace(5) [[TMP36]], align 32 -; CHECK-NEXT: [[TMP37:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[USERDATA0]], i64 0 -; CHECK-NEXT: [[TMP38:%.*]] = bitcast <2 x i32> [[TMP37]] to i64 -; CHECK-NEXT: [[TMP39:%.*]] = inttoptr i64 [[TMP38]] to ptr addrspace(4) -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP39]], i32 48 -; CHECK-NEXT: [[TMP41:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP40]], align 16 -; CHECK-NEXT: [[TMP42:%.*]] = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> [[TMP41]]) -; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP9]], 32 -; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[TMP43]] -; CHECK-NEXT: store ptr addrspace(7) [[TMP42]], ptr addrspace(5) [[TMP44]], align 32 -; CHECK-NEXT: [[TMP45:%.*]] = load volatile i32, ptr addrspace(7) [[TMP42]], align 4 -; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP9]], 64 -; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[TMP46]] -; CHECK-NEXT: store i32 [[TMP45]], ptr addrspace(5) [[TMP47]], align 4 -; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[TMP45]], -37 -; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP29]], i32 52 -; CHECK-NEXT: [[TMP50:%.*]] = load i64, ptr addrspace(7) [[TMP49]], align 8 -; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP29]], i32 60 -; CHECK-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(7) [[TMP51]], align 4 -; CHECK-NEXT: [[TMP53:%.*]] = mul i32 [[TMP48]], [[TMP52]] -; CHECK-NEXT: [[TMP54:%.*]] = inttoptr i64 [[TMP50]] to ptr addrspace(1) -; CHECK-NEXT: [[TMP55:%.*]] = sext i32 [[TMP53]] to i64 -; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP54]], i64 [[TMP55]] -; CHECK-NEXT: [[TMP57:%.*]] = load i64, ptr addrspace(1) [[TMP56]], align 8 -; CHECK-NEXT: [[TMP58:%.*]] = inttoptr i64 [[TMP57]] to ptr -; CHECK-NEXT: [[TMP59:%.*]] = ptrtoint ptr [[TMP58]] to i32 -; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP59]], 1 -; CHECK-NEXT: [[TMP61:%.*]] = inttoptr i32 [[TMP60]] to ptr -; CHECK-NEXT: [[TMP62:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4 -; CHECK-NEXT: [[TMP63:%.*]] = inttoptr i32 [[TMP62]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP36:%.*]] = inttoptr i32 [[TMP9]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP36]], i32 0 +; CHECK-NEXT: store ptr addrspace(7) [[TMP35]], ptr addrspace(5) [[TMP37]], align 32 +; CHECK-NEXT: [[TMP38:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[USERDATA0]], i64 0 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast <2 x i32> [[TMP38]] to i64 +; CHECK-NEXT: [[TMP40:%.*]] = inttoptr i64 [[TMP39]] to ptr addrspace(4) +; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP40]], i32 48 +; CHECK-NEXT: [[TMP42:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP41]], align 16 +; CHECK-NEXT: [[TMP43:%.*]] = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> [[TMP42]]) +; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP9]], 32 +; CHECK-NEXT: [[TMP45:%.*]] = inttoptr i32 [[TMP44]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP45]], i32 0 +; CHECK-NEXT: store ptr addrspace(7) [[TMP43]], ptr addrspace(5) [[TMP46]], align 32 +; CHECK-NEXT: [[TMP47:%.*]] = load volatile i32, ptr addrspace(7) [[TMP43]], align 4 +; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[TMP9]], 64 +; CHECK-NEXT: [[TMP49:%.*]] = inttoptr i32 [[TMP48]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP49]], i32 0 +; CHECK-NEXT: store i32 [[TMP47]], ptr addrspace(5) [[TMP50]], align 4 +; CHECK-NEXT: [[TMP51:%.*]] = add i32 [[TMP47]], -37 +; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP29]], i32 52 +; CHECK-NEXT: [[TMP53:%.*]] = load i64, ptr addrspace(7) [[TMP52]], align 8 +; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP29]], i32 60 +; CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(7) [[TMP54]], align 4 +; CHECK-NEXT: [[TMP56:%.*]] = mul i32 [[TMP51]], [[TMP55]] +; CHECK-NEXT: [[TMP57:%.*]] = inttoptr i64 [[TMP53]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP58:%.*]] = sext i32 [[TMP56]] to i64 +; CHECK-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP57]], i64 [[TMP58]] +; CHECK-NEXT: [[TMP60:%.*]] = load i64, ptr addrspace(1) [[TMP59]], align 8 +; CHECK-NEXT: [[TMP61:%.*]] = inttoptr i64 [[TMP60]] to ptr +; CHECK-NEXT: [[TMP62:%.*]] = ptrtoint ptr [[TMP61]] to i32 +; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP62]], 1 +; CHECK-NEXT: [[TMP64:%.*]] = inttoptr i32 [[TMP63]] to ptr +; CHECK-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4 +; CHECK-NEXT: [[TMP66:%.*]] = inttoptr i32 [[TMP65]] to ptr addrspace(5) ; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] ; CHECK: tail.block: -; CHECK-NEXT: [[TMP64:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 -; CHECK-NEXT: [[TMP65:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP64]], i32 [[TMP60]], 1 -; CHECK-NEXT: [[TMP66:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP65]], ptr addrspace(5) [[TMP63]], 2 -; CHECK-NEXT: [[TMP67:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP66]], i32 ptrtoint (ptr @_rgen_1.resume.0 to i32), 3 -; CHECK-NEXT: [[TMP68:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP67]], i32 undef, 4 -; CHECK-NEXT: [[TMP69:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP68]], i32 [[TMP48]], 5 -; CHECK-NEXT: [[TMP70:%.*]] = extractvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP69]], 1 -; CHECK-NEXT: [[TMP71:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP70]], i32 [[VCR]]) -; CHECK-NEXT: [[TMP72:%.*]] = icmp ne i32 [[TMP71]], 0 -; CHECK-NEXT: [[TMP73:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP72]]) -; CHECK-NEXT: [[TMP74:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP73]], i1 true) -; CHECK-NEXT: [[TMP75:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP71]], i32 [[TMP74]]) -; CHECK-NEXT: [[TMP76:%.*]] = icmp eq i32 [[TMP71]], [[TMP75]] -; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP76]]) -; CHECK-NEXT: [[TMP78:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP75]]) -; CHECK-NEXT: [[TMP79:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP77]]) -; CHECK-NEXT: [[TMP80:%.*]] = and i32 [[TMP78]], -64 -; CHECK-NEXT: [[TMP81:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP80]], i64 0 -; CHECK-NEXT: [[TMP82:%.*]] = bitcast <2 x i32> [[TMP81]] to i64 -; CHECK-NEXT: [[TMP83:%.*]] = inttoptr i64 [[TMP82]] to ptr -; CHECK-NEXT: [[TMP84:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 -; CHECK-NEXT: [[TMP85:%.*]] = bitcast i64 [[TMP84]] to <2 x i32> -; CHECK-NEXT: [[TMP86:%.*]] = extractelement <2 x i32> [[TMP85]], i64 0 -; CHECK-NEXT: [[TMP87:%.*]] = extractelement <2 x i32> [[TMP85]], i64 1 -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 -; CHECK-NEXT: [[TMP89:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 -; CHECK-NEXT: [[TMP90:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 -; CHECK-NEXT: [[TMP91:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 -; CHECK-NEXT: [[TMP92:%.*]] = insertelement <20 x i32> [[TMP91]], i32 [[TMP86]], i64 1 -; CHECK-NEXT: [[TMP93:%.*]] = insertelement <20 x i32> [[TMP92]], i32 [[TMP87]], i64 2 -; CHECK-NEXT: [[TMP94:%.*]] = insertelement <20 x i32> [[TMP93]], i32 [[USERDATA0]], i64 3 -; CHECK-NEXT: [[TMP95:%.*]] = insertelement <20 x i32> [[TMP94]], i32 [[USERDATA1]], i64 4 -; CHECK-NEXT: [[TMP96:%.*]] = insertelement <20 x i32> [[TMP95]], i32 [[USERDATA2]], i64 5 -; CHECK-NEXT: [[TMP97:%.*]] = insertelement <20 x i32> [[TMP96]], i32 [[USERDATA3]], i64 6 -; CHECK-NEXT: [[TMP98:%.*]] = insertelement <20 x i32> [[TMP97]], i32 [[USERDATA4]], i64 7 -; CHECK-NEXT: [[TMP99:%.*]] = insertelement <20 x i32> [[TMP98]], i32 [[USERDATA5]], i64 8 -; CHECK-NEXT: [[TMP100:%.*]] = insertelement <20 x i32> [[TMP99]], i32 [[PAD6]], i64 9 -; CHECK-NEXT: [[TMP101:%.*]] = insertelement <20 x i32> [[TMP100]], i32 [[PAD7]], i64 10 -; CHECK-NEXT: [[TMP102:%.*]] = insertelement <20 x i32> [[TMP101]], i32 [[PAD8]], i64 11 -; CHECK-NEXT: [[TMP103:%.*]] = insertelement <20 x i32> [[TMP102]], i32 [[PAD9]], i64 12 -; CHECK-NEXT: [[TMP104:%.*]] = insertelement <20 x i32> [[TMP103]], i32 [[PAD10]], i64 13 -; CHECK-NEXT: [[TMP105:%.*]] = insertelement <20 x i32> [[TMP104]], i32 [[PAD11]], i64 14 -; CHECK-NEXT: [[TMP106:%.*]] = insertelement <20 x i32> [[TMP105]], i32 [[SPILLTABLE]], i64 15 -; CHECK-NEXT: [[TMP107:%.*]] = insertelement <20 x i32> [[TMP106]], i32 [[TMP88]], i64 16 -; CHECK-NEXT: [[TMP108:%.*]] = insertelement <20 x i32> [[TMP107]], i32 [[TMP89]], i64 17 -; CHECK-NEXT: [[TMP109:%.*]] = insertelement <20 x i32> [[TMP108]], i32 [[TMP90]], i64 18 -; CHECK-NEXT: [[TMP110:%.*]] = insertelement <20 x i32> [[TMP109]], i32 [[MULTIDISPATCHINFO]], i64 19 -; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32i32i32s(ptr inreg [[TMP83]], i32 inreg [[TMP79]], <20 x i32> inreg [[TMP110]], { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP69]], i32 0) +; CHECK-NEXT: [[TMP67:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP68:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP67]], i32 [[TMP63]], 1 +; CHECK-NEXT: [[TMP69:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP68]], ptr addrspace(5) [[TMP66]], 2 +; CHECK-NEXT: [[TMP70:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP69]], i32 ptrtoint (ptr @_rgen_1.resume.0 to i32), 3 +; CHECK-NEXT: [[TMP71:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP70]], i32 undef, 4 +; CHECK-NEXT: [[TMP72:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP71]], i32 [[TMP51]], 5 +; CHECK-NEXT: [[TMP73:%.*]] = extractvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP72]], 1 +; CHECK-NEXT: [[TMP74:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP73]], i32 [[VCR]]) +; CHECK-NEXT: [[TMP75:%.*]] = icmp ne i32 [[TMP74]], 0 +; CHECK-NEXT: [[TMP76:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP75]]) +; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP76]], i1 true) +; CHECK-NEXT: [[TMP78:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP74]], i32 [[TMP77]]) +; CHECK-NEXT: [[TMP79:%.*]] = icmp eq i32 [[TMP74]], [[TMP78]] +; CHECK-NEXT: [[TMP80:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP79]]) +; CHECK-NEXT: [[TMP81:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP78]]) +; CHECK-NEXT: [[TMP82:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP80]]) +; CHECK-NEXT: [[TMP83:%.*]] = and i32 [[TMP81]], -64 +; CHECK-NEXT: [[TMP84:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP83]], i64 0 +; CHECK-NEXT: [[TMP85:%.*]] = bitcast <2 x i32> [[TMP84]] to i64 +; CHECK-NEXT: [[TMP86:%.*]] = inttoptr i64 [[TMP85]] to ptr +; CHECK-NEXT: [[TMP87:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP88:%.*]] = bitcast i64 [[TMP87]] to <2 x i32> +; CHECK-NEXT: [[TMP89:%.*]] = extractelement <2 x i32> [[TMP88]], i64 0 +; CHECK-NEXT: [[TMP90:%.*]] = extractelement <2 x i32> [[TMP88]], i64 1 +; CHECK-NEXT: [[TMP91:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP92:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP93:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP94:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP95:%.*]] = insertelement <20 x i32> [[TMP94]], i32 [[TMP89]], i64 1 +; CHECK-NEXT: [[TMP96:%.*]] = insertelement <20 x i32> [[TMP95]], i32 [[TMP90]], i64 2 +; CHECK-NEXT: [[TMP97:%.*]] = insertelement <20 x i32> [[TMP96]], i32 [[USERDATA0]], i64 3 +; CHECK-NEXT: [[TMP98:%.*]] = insertelement <20 x i32> [[TMP97]], i32 [[USERDATA1]], i64 4 +; CHECK-NEXT: [[TMP99:%.*]] = insertelement <20 x i32> [[TMP98]], i32 [[USERDATA2]], i64 5 +; CHECK-NEXT: [[TMP100:%.*]] = insertelement <20 x i32> [[TMP99]], i32 [[USERDATA3]], i64 6 +; CHECK-NEXT: [[TMP101:%.*]] = insertelement <20 x i32> [[TMP100]], i32 [[USERDATA4]], i64 7 +; CHECK-NEXT: [[TMP102:%.*]] = insertelement <20 x i32> [[TMP101]], i32 [[USERDATA5]], i64 8 +; CHECK-NEXT: [[TMP103:%.*]] = insertelement <20 x i32> [[TMP102]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP104:%.*]] = insertelement <20 x i32> [[TMP103]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP105:%.*]] = insertelement <20 x i32> [[TMP104]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP106:%.*]] = insertelement <20 x i32> [[TMP105]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP107:%.*]] = insertelement <20 x i32> [[TMP106]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP108:%.*]] = insertelement <20 x i32> [[TMP107]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP109:%.*]] = insertelement <20 x i32> [[TMP108]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP110:%.*]] = insertelement <20 x i32> [[TMP109]], i32 [[TMP91]], i64 16 +; CHECK-NEXT: [[TMP111:%.*]] = insertelement <20 x i32> [[TMP110]], i32 [[TMP92]], i64 17 +; CHECK-NEXT: [[TMP112:%.*]] = insertelement <20 x i32> [[TMP111]], i32 [[TMP93]], i64 18 +; CHECK-NEXT: [[TMP113:%.*]] = insertelement <20 x i32> [[TMP112]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32i32i32s(ptr inreg [[TMP86]], i32 inreg [[TMP82]], <20 x i32> inreg [[TMP113]], { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP72]], i32 0) ; CHECK-NEXT: unreachable ; ; @@ -284,71 +287,75 @@ attributes #7 = { nounwind willreturn memory(inaccessiblemem: read) } ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], -96 ; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], 64 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[TMP13]] -; CHECK-NEXT: [[DOTRELOAD6:%.*]] = load i32, ptr addrspace(5) [[TMP14]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP12]], 32 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[TMP15]] -; CHECK-NEXT: [[DOTRELOAD3:%.*]] = load ptr addrspace(7), ptr addrspace(5) [[TMP16]], align 32 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[TMP12]] -; CHECK-NEXT: [[DOTRELOAD:%.*]] = load ptr addrspace(7), ptr addrspace(5) [[TMP17]], align 32 -; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[USERDATA5]], 96 -; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[TMP12]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[TMP19]] -; CHECK-NEXT: [[DUMMY_RELOAD:%.*]] = load ptr addrspace(7), ptr addrspace(5) [[TMP20]], align 32 -; CHECK-NEXT: [[TMP21:%.*]] = load volatile i32, ptr addrspace(7) [[DOTRELOAD3]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[DOTRELOAD6]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = zext i1 [[TMP22]] to i32 -; CHECK-NEXT: store i32 [[TMP23]], ptr addrspace(7) [[DOTRELOAD]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP13]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP14]], i32 0 +; CHECK-NEXT: [[DOTRELOAD6:%.*]] = load i32, ptr addrspace(5) [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP12]], 32 +; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP16]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP17]], i32 0 +; CHECK-NEXT: [[DOTRELOAD3:%.*]] = load ptr addrspace(7), ptr addrspace(5) [[TMP18]], align 32 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP12]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP19]], i32 0 +; CHECK-NEXT: [[DOTRELOAD:%.*]] = load ptr addrspace(7), ptr addrspace(5) [[TMP20]], align 32 +; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[USERDATA5]], 96 +; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP12]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP22]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP23]], i32 0 +; CHECK-NEXT: [[DUMMY_RELOAD:%.*]] = load ptr addrspace(7), ptr addrspace(5) [[TMP24]], align 32 +; CHECK-NEXT: [[TMP25:%.*]] = load volatile i32, ptr addrspace(7) [[DOTRELOAD3]], align 4 +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[DOTRELOAD6]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = zext i1 [[TMP26]] to i32 +; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(7) [[DOTRELOAD]], align 4 ; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] ; CHECK: tail.block: -; CHECK-NEXT: [[TMP24:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 -; CHECK-NEXT: [[TMP25:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP24]], i32 0, 1 -; CHECK-NEXT: [[TMP26:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP25]], ptr addrspace(5) poison, 2 -; CHECK-NEXT: [[TMP27:%.*]] = extractvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP26]], 1 -; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP27]], i32 [[VCR]]) -; CHECK-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP29]]) -; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP30]], i1 true) -; CHECK-NEXT: [[TMP32:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP28]], i32 [[TMP31]]) -; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i32 [[TMP28]], [[TMP32]] +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP28]], i32 0, 1 +; CHECK-NEXT: [[TMP30:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP29]], ptr addrspace(5) poison, 2 +; CHECK-NEXT: [[TMP31:%.*]] = extractvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP30]], 1 +; CHECK-NEXT: [[TMP32:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP31]], i32 [[VCR]]) +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 ; CHECK-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP33]]) -; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP32]]) -; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP34]]) -; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i32 [[TMP35]], 0 -; CHECK-NEXT: br i1 [[TMP37]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]] +; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP34]], i1 true) +; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP32]], i32 [[TMP35]]) +; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i32 [[TMP32]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP37]]) +; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP36]]) +; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP38]]) +; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i32 [[TMP39]], 0 +; CHECK-NEXT: br i1 [[TMP41]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]] ; CHECK: chain.block: -; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP35]], -64 -; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP38]], i64 0 -; CHECK-NEXT: [[TMP40:%.*]] = bitcast <2 x i32> [[TMP39]] to i64 -; CHECK-NEXT: [[TMP41:%.*]] = inttoptr i64 [[TMP40]] to ptr -; CHECK-NEXT: [[TMP42:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 -; CHECK-NEXT: [[TMP43:%.*]] = bitcast i64 [[TMP42]] to <2 x i32> -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[TMP43]], i64 0 -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i32> [[TMP43]], i64 1 -; CHECK-NEXT: [[TMP46:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 -; CHECK-NEXT: [[TMP47:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 -; CHECK-NEXT: [[TMP48:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 -; CHECK-NEXT: [[TMP49:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 -; CHECK-NEXT: [[TMP50:%.*]] = insertelement <20 x i32> [[TMP49]], i32 [[TMP44]], i64 1 -; CHECK-NEXT: [[TMP51:%.*]] = insertelement <20 x i32> [[TMP50]], i32 [[TMP45]], i64 2 -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <20 x i32> [[TMP51]], i32 [[USERDATA0]], i64 3 -; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> [[TMP52]], i32 [[USERDATA1]], i64 4 -; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[USERDATA2]], i64 5 -; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[USERDATA3]], i64 6 -; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[USERDATA4]], i64 7 -; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[USERDATA5]], i64 8 -; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[PAD6]], i64 9 -; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD7]], i64 10 -; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD8]], i64 11 -; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD9]], i64 12 -; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD10]], i64 13 -; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD11]], i64 14 -; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[SPILLTABLE]], i64 15 -; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[TMP46]], i64 16 -; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[TMP47]], i64 17 -; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[TMP48]], i64 18 -; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[MULTIDISPATCHINFO]], i64 19 -; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5) }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5s(ptr inreg [[TMP41]], i32 inreg [[TMP36]], <20 x i32> inreg [[TMP68]], { <3 x i32>, i32, ptr addrspace(5) } [[TMP26]], i32 0) +; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP39]], -64 +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP42]], i64 0 +; CHECK-NEXT: [[TMP44:%.*]] = bitcast <2 x i32> [[TMP43]] to i64 +; CHECK-NEXT: [[TMP45:%.*]] = inttoptr i64 [[TMP44]] to ptr +; CHECK-NEXT: [[TMP46:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP47:%.*]] = bitcast i64 [[TMP46]] to <2 x i32> +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <2 x i32> [[TMP47]], i64 0 +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x i32> [[TMP47]], i64 1 +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP51:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[TMP48]], i64 1 +; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[TMP49]], i64 2 +; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[USERDATA0]], i64 3 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[USERDATA1]], i64 4 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[USERDATA2]], i64 5 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[USERDATA3]], i64 6 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[USERDATA4]], i64 7 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[USERDATA5]], i64 8 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[TMP50]], i64 16 +; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[TMP51]], i64 17 +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[TMP52]], i64 18 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5) }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5s(ptr inreg [[TMP45]], i32 inreg [[TMP40]], <20 x i32> inreg [[TMP72]], { <3 x i32>, i32, ptr addrspace(5) } [[TMP30]], i32 0) ; CHECK-NEXT: unreachable ; CHECK: ret.block: ; CHECK-NEXT: ret void diff --git a/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc b/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc index ed1fcb50dc..89db5c7b5f 100644 --- a/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc +++ b/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc @@ -111,68 +111,71 @@ define void @test.nested.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i3 ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 12 ; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(5) [[TMP1]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[TMP10]] -; CHECK-NEXT: store i32 333, ptr addrspace(5) [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP10]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[TMP13]] -; CHECK-NEXT: store i32 111, ptr addrspace(5) [[TMP14]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP10]], 9 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[TMP15]] -; CHECK-NEXT: store i8 99, ptr addrspace(5) [[TMP16]], align 1 -; CHECK-NEXT: [[STATE:%.*]] = insertvalue { i32 } poison, i32 [[TMP15]], 0 -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP10]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP12]], i32 0 +; CHECK-NEXT: store i32 333, ptr addrspace(5) [[TMP13]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP10]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP15]], i32 0 +; CHECK-NEXT: store i32 111, ptr addrspace(5) [[TMP16]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP10]], 9 ; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP17]] to ptr addrspace(5) -; CHECK-NEXT: store { i32 } [[STATE]], ptr addrspace(5) [[TMP18]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP18]], i32 4 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP18]], i32 0 +; CHECK-NEXT: store i8 99, ptr addrspace(5) [[TMP19]], align 1 +; CHECK-NEXT: [[STATE:%.*]] = insertvalue { i32 } poison, i32 [[TMP17]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP20]] to ptr addrspace(5) +; CHECK-NEXT: store { i32 } [[STATE]], ptr addrspace(5) [[TMP21]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP21]], i32 4 ; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] ; CHECK: tail.block: -; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 -; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP20]], i32 add (i32 ptrtoint (ptr @test.1 to i32), i32 1), 1 -; CHECK-NEXT: [[TMP22:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP21]], ptr addrspace(5) [[TMP19]], 2 -; CHECK-NEXT: [[TMP23:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP22]], i32 [[TMP15]], 3 -; CHECK-NEXT: [[TMP24:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP23]], i32 [[TMP13]], 4 -; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP24]], 1 -; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP25]], i32 [[VCR]]) -; CHECK-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP27]]) -; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP28]], i1 true) -; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP26]], i32 [[TMP29]]) -; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[TMP26]], [[TMP30]] -; CHECK-NEXT: [[TMP32:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP31]]) -; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP30]]) -; CHECK-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP32]]) -; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP33]], -64 -; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP35]], i64 0 -; CHECK-NEXT: [[TMP37:%.*]] = bitcast <2 x i32> [[TMP36]] to i64 -; CHECK-NEXT: [[TMP38:%.*]] = inttoptr i64 [[TMP37]] to ptr -; CHECK-NEXT: [[TMP39:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 -; CHECK-NEXT: [[TMP40:%.*]] = bitcast i64 [[TMP39]] to <2 x i32> -; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x i32> [[TMP40]], i64 0 -; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[TMP40]], i64 1 -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 -; CHECK-NEXT: [[TMP46:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 -; CHECK-NEXT: [[TMP47:%.*]] = insertelement <20 x i32> [[TMP46]], i32 [[TMP41]], i64 1 -; CHECK-NEXT: [[TMP48:%.*]] = insertelement <20 x i32> [[TMP47]], i32 [[TMP42]], i64 2 -; CHECK-NEXT: [[TMP49:%.*]] = insertelement <20 x i32> [[TMP48]], i32 [[PAD0]], i64 3 -; CHECK-NEXT: [[TMP50:%.*]] = insertelement <20 x i32> [[TMP49]], i32 [[PAD1]], i64 4 -; CHECK-NEXT: [[TMP51:%.*]] = insertelement <20 x i32> [[TMP50]], i32 [[PAD2]], i64 5 -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <20 x i32> [[TMP51]], i32 [[PAD3]], i64 6 -; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> [[TMP52]], i32 [[PAD4]], i64 7 -; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[PAD5]], i64 8 -; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[PAD6]], i64 9 -; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[PAD7]], i64 10 -; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[PAD8]], i64 11 -; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[PAD9]], i64 12 -; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD10]], i64 13 -; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD11]], i64 14 -; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[SPILLTABLE]], i64 15 -; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[TMP43]], i64 16 -; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[TMP44]], i64 17 -; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[TMP45]], i64 18 -; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[MULTIDISPATCHINFO]], i64 19 -; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32i32s(ptr inreg [[TMP38]], i32 inreg [[TMP34]], <20 x i32> inreg [[TMP65]], { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP24]], i32 0) +; CHECK-NEXT: [[TMP23:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP23]], i32 add (i32 ptrtoint (ptr @test.1 to i32), i32 1), 1 +; CHECK-NEXT: [[TMP25:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP24]], ptr addrspace(5) [[TMP22]], 2 +; CHECK-NEXT: [[TMP26:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP25]], i32 [[TMP17]], 3 +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP26]], i32 [[TMP14]], 4 +; CHECK-NEXT: [[TMP28:%.*]] = extractvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP27]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP28]], i32 [[VCR]]) +; CHECK-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP30]]) +; CHECK-NEXT: [[TMP32:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP31]], i1 true) +; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP29]], i32 [[TMP32]]) +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP29]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP34]]) +; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP33]]) +; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP35]]) +; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP36]], -64 +; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP38]], i64 0 +; CHECK-NEXT: [[TMP40:%.*]] = bitcast <2 x i32> [[TMP39]] to i64 +; CHECK-NEXT: [[TMP41:%.*]] = inttoptr i64 [[TMP40]] to ptr +; CHECK-NEXT: [[TMP42:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP43:%.*]] = bitcast i64 [[TMP42]] to <2 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[TMP43]], i64 0 +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i32> [[TMP43]], i64 1 +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP50:%.*]] = insertelement <20 x i32> [[TMP49]], i32 [[TMP44]], i64 1 +; CHECK-NEXT: [[TMP51:%.*]] = insertelement <20 x i32> [[TMP50]], i32 [[TMP45]], i64 2 +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <20 x i32> [[TMP51]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> [[TMP52]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[TMP46]], i64 16 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[TMP47]], i64 17 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[TMP48]], i64 18 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32i32s(ptr inreg [[TMP41]], i32 inreg [[TMP37]], <20 x i32> inreg [[TMP68]], { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP27]], i32 0) ; CHECK-NEXT: unreachable ; ; @@ -188,59 +191,61 @@ define void @test.nested.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i3 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP7]] to <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr addrspace(5) [[VSP]] to i32 ; CHECK-NEXT: store i32 [[TMP9]], ptr addrspace(5) [[TMP1]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[Q1]] -; CHECK-NEXT: [[N111:%.*]] = load i32, ptr addrspace(5) [[TMP10]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[P2]] -; CHECK-NEXT: [[N99:%.*]] = load i8, ptr addrspace(5) [[TMP11]], align 1 -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP12]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i32 [[Q1]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP10]], i32 0 +; CHECK-NEXT: [[N111:%.*]] = load i32, ptr addrspace(5) [[TMP11]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i32 [[P2]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP12]], i32 0 +; CHECK-NEXT: [[N99:%.*]] = load i8, ptr addrspace(5) [[TMP13]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(5) ; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] ; CHECK: tail.block: -; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP14]], i32 add (i32 ptrtoint (ptr @test.2 to i32), i32 1), 1 -; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP15]], ptr addrspace(5) [[TMP13]], 2 -; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP16]], 1 -; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP17]], i32 [[VCR]]) -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP19]]) -; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP20]], i1 true) -; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP18]], i32 [[TMP21]]) -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[TMP18]], [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP23]]) -; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP22]]) -; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP24]]) -; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP25]], -64 -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP27]], i64 0 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast <2 x i32> [[TMP28]] to i64 -; CHECK-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP29]] to ptr -; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 -; CHECK-NEXT: [[TMP32:%.*]] = bitcast i64 [[TMP31]] to <2 x i32> -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x i32> [[TMP32]], i64 0 -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x i32> [[TMP32]], i64 1 -; CHECK-NEXT: [[TMP35:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 -; CHECK-NEXT: [[TMP36:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 -; CHECK-NEXT: [[TMP38:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 -; CHECK-NEXT: [[TMP39:%.*]] = insertelement <20 x i32> [[TMP38]], i32 [[TMP33]], i64 1 -; CHECK-NEXT: [[TMP40:%.*]] = insertelement <20 x i32> [[TMP39]], i32 [[TMP34]], i64 2 -; CHECK-NEXT: [[TMP41:%.*]] = insertelement <20 x i32> [[TMP40]], i32 [[PAD0]], i64 3 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <20 x i32> [[TMP41]], i32 [[PAD1]], i64 4 -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <20 x i32> [[TMP42]], i32 [[PAD2]], i64 5 -; CHECK-NEXT: [[TMP44:%.*]] = insertelement <20 x i32> [[TMP43]], i32 [[PAD3]], i64 6 -; CHECK-NEXT: [[TMP45:%.*]] = insertelement <20 x i32> [[TMP44]], i32 [[PAD4]], i64 7 -; CHECK-NEXT: [[TMP46:%.*]] = insertelement <20 x i32> [[TMP45]], i32 [[PAD5]], i64 8 -; CHECK-NEXT: [[TMP47:%.*]] = insertelement <20 x i32> [[TMP46]], i32 [[PAD6]], i64 9 -; CHECK-NEXT: [[TMP48:%.*]] = insertelement <20 x i32> [[TMP47]], i32 [[PAD7]], i64 10 -; CHECK-NEXT: [[TMP49:%.*]] = insertelement <20 x i32> [[TMP48]], i32 [[PAD8]], i64 11 -; CHECK-NEXT: [[TMP50:%.*]] = insertelement <20 x i32> [[TMP49]], i32 [[PAD9]], i64 12 -; CHECK-NEXT: [[TMP51:%.*]] = insertelement <20 x i32> [[TMP50]], i32 [[PAD10]], i64 13 -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <20 x i32> [[TMP51]], i32 [[PAD11]], i64 14 -; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> [[TMP52]], i32 [[SPILLTABLE]], i64 15 -; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[TMP35]], i64 16 -; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[TMP36]], i64 17 -; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[TMP37]], i64 18 -; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[MULTIDISPATCHINFO]], i64 19 -; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5) }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5s(ptr inreg [[TMP30]], i32 inreg [[TMP26]], <20 x i32> inreg [[TMP57]], { <3 x i32>, i32, ptr addrspace(5) } [[TMP16]], i32 0) +; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP16]], i32 add (i32 ptrtoint (ptr @test.2 to i32), i32 1), 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP17]], ptr addrspace(5) [[TMP15]], 2 +; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP18]], 1 +; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP19]], i32 [[VCR]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP21]]) +; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP22]], i1 true) +; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP20]], i32 [[TMP23]]) +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP20]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP25]]) +; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP24]]) +; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP26]]) +; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP27]], -64 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP29]], i64 0 +; CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i32> [[TMP30]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr +; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = bitcast i64 [[TMP33]] to <2 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i32> [[TMP34]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x i32> [[TMP34]], i64 1 +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = insertelement <20 x i32> [[TMP40]], i32 [[TMP35]], i64 1 +; CHECK-NEXT: [[TMP42:%.*]] = insertelement <20 x i32> [[TMP41]], i32 [[TMP36]], i64 2 +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <20 x i32> [[TMP42]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP44:%.*]] = insertelement <20 x i32> [[TMP43]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP45:%.*]] = insertelement <20 x i32> [[TMP44]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP46:%.*]] = insertelement <20 x i32> [[TMP45]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP47:%.*]] = insertelement <20 x i32> [[TMP46]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP48:%.*]] = insertelement <20 x i32> [[TMP47]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <20 x i32> [[TMP48]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP50:%.*]] = insertelement <20 x i32> [[TMP49]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP51:%.*]] = insertelement <20 x i32> [[TMP50]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <20 x i32> [[TMP51]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> [[TMP52]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[TMP37]], i64 16 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[TMP38]], i64 17 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[TMP39]], i64 18 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5) }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5s(ptr inreg [[TMP32]], i32 inreg [[TMP28]], <20 x i32> inreg [[TMP59]], { <3 x i32>, i32, ptr addrspace(5) } [[TMP18]], i32 0) ; CHECK-NEXT: unreachable ; ; @@ -261,63 +266,65 @@ define void @test.nested.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i3 ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], -12 ; CHECK-NEXT: [[P2:%.*]] = extractvalue { i32 } [[CPS_STATE]], 0 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[TMP12]] -; CHECK-NEXT: [[N333:%.*]] = load i32, ptr addrspace(5) [[TMP13]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[P2]] -; CHECK-NEXT: [[N99:%.*]] = load i8, ptr addrspace(5) [[TMP14]], align 1 -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], -12 -; CHECK-NEXT: store i32 [[TMP16]], ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP12]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP13]], i32 0 +; CHECK-NEXT: [[N333:%.*]] = load i32, ptr addrspace(5) [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i32 [[P2]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP15]], i32 0 +; CHECK-NEXT: [[N99:%.*]] = load i8, ptr addrspace(5) [[TMP16]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], -12 +; CHECK-NEXT: store i32 [[TMP18]], ptr addrspace(5) [[TMP1]], align 4 ; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] ; CHECK: tail.block: -; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 -; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP17]], i32 0, 1 -; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP18]], ptr addrspace(5) poison, 2 -; CHECK-NEXT: [[TMP20:%.*]] = extractvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP19]], 1 -; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP20]], i32 [[VCR]]) -; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP22]]) -; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP23]], i1 true) -; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP21]], i32 [[TMP24]]) -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP21]], [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP26]]) -; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP25]]) -; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP27]]) -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP28]], 0 -; CHECK-NEXT: br i1 [[TMP30]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP19]], i32 0, 1 +; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP20]], ptr addrspace(5) poison, 2 +; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP21]], 1 +; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP22]], i32 [[VCR]]) +; CHECK-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP24]]) +; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP25]], i1 true) +; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP23]], i32 [[TMP26]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP23]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP28]]) +; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP27]]) +; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP29]]) +; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[TMP30]], 0 +; CHECK-NEXT: br i1 [[TMP32]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]] ; CHECK: chain.block: -; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP28]], -64 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP31]], i64 0 -; CHECK-NEXT: [[TMP33:%.*]] = bitcast <2 x i32> [[TMP32]] to i64 -; CHECK-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP33]] to ptr -; CHECK-NEXT: [[TMP35:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 -; CHECK-NEXT: [[TMP36:%.*]] = bitcast i64 [[TMP35]] to <2 x i32> -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i32> [[TMP36]], i64 0 -; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i32> [[TMP36]], i64 1 -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 -; CHECK-NEXT: [[TMP41:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <20 x i32> [[TMP42]], i32 [[TMP37]], i64 1 -; CHECK-NEXT: [[TMP44:%.*]] = insertelement <20 x i32> [[TMP43]], i32 [[TMP38]], i64 2 -; CHECK-NEXT: [[TMP45:%.*]] = insertelement <20 x i32> [[TMP44]], i32 [[PAD0]], i64 3 -; CHECK-NEXT: [[TMP46:%.*]] = insertelement <20 x i32> [[TMP45]], i32 [[PAD1]], i64 4 -; CHECK-NEXT: [[TMP47:%.*]] = insertelement <20 x i32> [[TMP46]], i32 [[PAD2]], i64 5 -; CHECK-NEXT: [[TMP48:%.*]] = insertelement <20 x i32> [[TMP47]], i32 [[PAD3]], i64 6 -; CHECK-NEXT: [[TMP49:%.*]] = insertelement <20 x i32> [[TMP48]], i32 [[PAD4]], i64 7 -; CHECK-NEXT: [[TMP50:%.*]] = insertelement <20 x i32> [[TMP49]], i32 [[PAD5]], i64 8 -; CHECK-NEXT: [[TMP51:%.*]] = insertelement <20 x i32> [[TMP50]], i32 [[PAD6]], i64 9 -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <20 x i32> [[TMP51]], i32 [[PAD7]], i64 10 -; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> [[TMP52]], i32 [[PAD8]], i64 11 -; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[PAD9]], i64 12 -; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[PAD10]], i64 13 -; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[PAD11]], i64 14 -; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[SPILLTABLE]], i64 15 -; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[TMP39]], i64 16 -; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[TMP40]], i64 17 -; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[TMP41]], i64 18 -; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[MULTIDISPATCHINFO]], i64 19 -; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5) }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5s(ptr inreg [[TMP34]], i32 inreg [[TMP29]], <20 x i32> inreg [[TMP61]], { <3 x i32>, i32, ptr addrspace(5) } [[TMP19]], i32 0) +; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP30]], -64 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP33]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = bitcast <2 x i32> [[TMP34]] to i64 +; CHECK-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP35]] to ptr +; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP38:%.*]] = bitcast i64 [[TMP37]] to <2 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP38]], i64 0 +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i32> [[TMP38]], i64 1 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP44:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP45:%.*]] = insertelement <20 x i32> [[TMP44]], i32 [[TMP39]], i64 1 +; CHECK-NEXT: [[TMP46:%.*]] = insertelement <20 x i32> [[TMP45]], i32 [[TMP40]], i64 2 +; CHECK-NEXT: [[TMP47:%.*]] = insertelement <20 x i32> [[TMP46]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP48:%.*]] = insertelement <20 x i32> [[TMP47]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <20 x i32> [[TMP48]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP50:%.*]] = insertelement <20 x i32> [[TMP49]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP51:%.*]] = insertelement <20 x i32> [[TMP50]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <20 x i32> [[TMP51]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> [[TMP52]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[TMP41]], i64 16 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[TMP42]], i64 17 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[TMP43]], i64 18 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5) }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5s(ptr inreg [[TMP36]], i32 inreg [[TMP31]], <20 x i32> inreg [[TMP63]], { <3 x i32>, i32, ptr addrspace(5) } [[TMP21]], i32 0) ; CHECK-NEXT: unreachable ; CHECK: ret.block: ; CHECK-NEXT: ret void @@ -342,75 +349,78 @@ define void @test.nested.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i3 ; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[STACK_EL0]], 12 ; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP10]], [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[TMP13]] -; CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(5) [[TMP15]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP13]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP15]], i32 0 +; CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(5) [[TMP16]], align 4 ; CHECK-NEXT: [[STACK_EL1:%.*]] = call i32 @lgc.cps.get.dummy.index(i32 1) -; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[STACK_EL1]], 12 -; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP10]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], -4 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[TMP17]] -; CHECK-NEXT: store i32 [[TMP19]], ptr addrspace(5) [[TMP20]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[STACK_EL1]], 12 +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP10]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], -4 +; CHECK-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP18]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP21]], i32 0 +; CHECK-NEXT: store i32 [[TMP20]], ptr addrspace(5) [[TMP22]], align 4 ; CHECK-NEXT: [[STACK_EL2:%.*]] = call i32 @lgc.cps.get.dummy.index(i32 2) ; CHECK-NEXT: [[STACK_EL2_DIV:%.*]] = sdiv i32 [[STACK_EL2]], 2 -; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP10]], 4 -; CHECK-NEXT: [[TMP22:%.*]] = mul i32 [[STACK_EL2_DIV]], 12 -; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[TMP21]], [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 -; CHECK-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], -8 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[TMP23]] -; CHECK-NEXT: store i32 [[TMP25]], ptr addrspace(5) [[TMP26]], align 4 -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 -; CHECK-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP27]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[TMP10]], 4 +; CHECK-NEXT: [[TMP24:%.*]] = mul i32 [[STACK_EL2_DIV]], 12 +; CHECK-NEXT: [[TMP25:%.*]] = add i32 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[TMP26]], -8 +; CHECK-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP25]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP28]], i32 0 +; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(5) [[TMP29]], align 4 +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(5) ; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] ; CHECK: tail.block: -; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 -; CHECK-NEXT: [[TMP30:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP29]], i32 add (i32 ptrtoint (ptr @test.1 to i32), i32 1), 1 -; CHECK-NEXT: [[TMP31:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP30]], ptr addrspace(5) [[TMP28]], 2 -; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP31]], i32 [[TMP25]], 3 -; CHECK-NEXT: [[TMP33:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP32]], i32 [[TMP25]], 4 -; CHECK-NEXT: [[TMP34:%.*]] = extractvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP33]], 1 -; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP34]], i32 [[VCR]]) -; CHECK-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP36]]) -; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP37]], i1 true) -; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP35]], i32 [[TMP38]]) -; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i32 [[TMP35]], [[TMP39]] -; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP40]]) -; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP39]]) -; CHECK-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP41]]) -; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP42]], -64 -; CHECK-NEXT: [[TMP45:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP44]], i64 0 -; CHECK-NEXT: [[TMP46:%.*]] = bitcast <2 x i32> [[TMP45]] to i64 -; CHECK-NEXT: [[TMP47:%.*]] = inttoptr i64 [[TMP46]] to ptr -; CHECK-NEXT: [[TMP48:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 -; CHECK-NEXT: [[TMP49:%.*]] = bitcast i64 [[TMP48]] to <2 x i32> -; CHECK-NEXT: [[TMP50:%.*]] = extractelement <2 x i32> [[TMP49]], i64 0 -; CHECK-NEXT: [[TMP51:%.*]] = extractelement <2 x i32> [[TMP49]], i64 1 -; CHECK-NEXT: [[TMP52:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 -; CHECK-NEXT: [[TMP54:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 -; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 -; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[TMP50]], i64 1 -; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[TMP51]], i64 2 -; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[PAD0]], i64 3 -; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD1]], i64 4 -; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD2]], i64 5 -; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD3]], i64 6 -; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD4]], i64 7 -; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD5]], i64 8 -; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD6]], i64 9 -; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD7]], i64 10 -; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD8]], i64 11 -; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD9]], i64 12 -; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD10]], i64 13 -; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[PAD11]], i64 14 -; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[SPILLTABLE]], i64 15 -; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[TMP52]], i64 16 -; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[TMP53]], i64 17 -; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[TMP54]], i64 18 -; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[MULTIDISPATCHINFO]], i64 19 -; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32i32s(ptr inreg [[TMP47]], i32 inreg [[TMP43]], <20 x i32> inreg [[TMP74]], { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP33]], i32 0) +; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP33:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP32]], i32 add (i32 ptrtoint (ptr @test.1 to i32), i32 1), 1 +; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP33]], ptr addrspace(5) [[TMP31]], 2 +; CHECK-NEXT: [[TMP35:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP34]], i32 [[TMP27]], 3 +; CHECK-NEXT: [[TMP36:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP35]], i32 [[TMP27]], 4 +; CHECK-NEXT: [[TMP37:%.*]] = extractvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP36]], 1 +; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP37]], i32 [[VCR]]) +; CHECK-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP39]]) +; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP40]], i1 true) +; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP38]], i32 [[TMP41]]) +; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[TMP38]], [[TMP42]] +; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP42]]) +; CHECK-NEXT: [[TMP46:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP44]]) +; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP45]], -64 +; CHECK-NEXT: [[TMP48:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP47]], i64 0 +; CHECK-NEXT: [[TMP49:%.*]] = bitcast <2 x i32> [[TMP48]] to i64 +; CHECK-NEXT: [[TMP50:%.*]] = inttoptr i64 [[TMP49]] to ptr +; CHECK-NEXT: [[TMP51:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP52:%.*]] = bitcast i64 [[TMP51]] to <2 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <2 x i32> [[TMP52]], i64 0 +; CHECK-NEXT: [[TMP54:%.*]] = extractelement <2 x i32> [[TMP52]], i64 1 +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP57:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[TMP53]], i64 1 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[TMP54]], i64 2 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[TMP55]], i64 16 +; CHECK-NEXT: [[TMP75:%.*]] = insertelement <20 x i32> [[TMP74]], i32 [[TMP56]], i64 17 +; CHECK-NEXT: [[TMP76:%.*]] = insertelement <20 x i32> [[TMP75]], i32 [[TMP57]], i64 18 +; CHECK-NEXT: [[TMP77:%.*]] = insertelement <20 x i32> [[TMP76]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32i32s(ptr inreg [[TMP50]], i32 inreg [[TMP46]], <20 x i32> inreg [[TMP77]], { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP36]], i32 0) ; CHECK-NEXT: unreachable ; ; @@ -434,59 +444,60 @@ define void @test.nested.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i3 ; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP10]], [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], 8 ; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(5) null, i32 [[TMP14]] -; CHECK-NEXT: store i32 [[TMP15]], ptr addrspace(5) [[TMP16]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP17]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP16]], i32 0 +; CHECK-NEXT: store i32 [[TMP15]], ptr addrspace(5) [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP18]] to ptr addrspace(5) ; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] ; CHECK: tail.block: -; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 -; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP19]], i32 add (i32 ptrtoint (ptr @test.1 to i32), i32 1), 1 -; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP20]], ptr addrspace(5) [[TMP18]], 2 -; CHECK-NEXT: [[TMP22:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP21]], i32 [[TMP15]], 3 -; CHECK-NEXT: [[TMP23:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP22]], i32 [[TMP15]], 4 -; CHECK-NEXT: [[TMP24:%.*]] = extractvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP23]], 1 -; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP24]], i32 [[VCR]]) -; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP26]]) -; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP27]], i1 true) -; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP25]], i32 [[TMP28]]) -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP25]], [[TMP29]] -; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP30]]) -; CHECK-NEXT: [[TMP32:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP29]]) -; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP31]]) -; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP32]], -64 -; CHECK-NEXT: [[TMP35:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP34]], i64 0 -; CHECK-NEXT: [[TMP36:%.*]] = bitcast <2 x i32> [[TMP35]] to i64 -; CHECK-NEXT: [[TMP37:%.*]] = inttoptr i64 [[TMP36]] to ptr -; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 -; CHECK-NEXT: [[TMP39:%.*]] = bitcast i64 [[TMP38]] to <2 x i32> -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i32> [[TMP39]], i64 0 -; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x i32> [[TMP39]], i64 1 -; CHECK-NEXT: [[TMP42:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 -; CHECK-NEXT: [[TMP45:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 -; CHECK-NEXT: [[TMP46:%.*]] = insertelement <20 x i32> [[TMP45]], i32 [[TMP40]], i64 1 -; CHECK-NEXT: [[TMP47:%.*]] = insertelement <20 x i32> [[TMP46]], i32 [[TMP41]], i64 2 -; CHECK-NEXT: [[TMP48:%.*]] = insertelement <20 x i32> [[TMP47]], i32 [[PAD0]], i64 3 -; CHECK-NEXT: [[TMP49:%.*]] = insertelement <20 x i32> [[TMP48]], i32 [[PAD1]], i64 4 -; CHECK-NEXT: [[TMP50:%.*]] = insertelement <20 x i32> [[TMP49]], i32 [[PAD2]], i64 5 -; CHECK-NEXT: [[TMP51:%.*]] = insertelement <20 x i32> [[TMP50]], i32 [[PAD3]], i64 6 -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <20 x i32> [[TMP51]], i32 [[PAD4]], i64 7 -; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> [[TMP52]], i32 [[PAD5]], i64 8 -; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[PAD6]], i64 9 -; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[PAD7]], i64 10 -; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[PAD8]], i64 11 -; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[PAD9]], i64 12 -; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[PAD10]], i64 13 -; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD11]], i64 14 -; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[SPILLTABLE]], i64 15 -; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[TMP42]], i64 16 -; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[TMP43]], i64 17 -; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[TMP44]], i64 18 -; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[MULTIDISPATCHINFO]], i64 19 -; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32i32s(ptr inreg [[TMP37]], i32 inreg [[TMP33]], <20 x i32> inreg [[TMP64]], { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP23]], i32 0) +; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP20]], i32 add (i32 ptrtoint (ptr @test.1 to i32), i32 1), 1 +; CHECK-NEXT: [[TMP22:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP21]], ptr addrspace(5) [[TMP19]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP22]], i32 [[TMP15]], 3 +; CHECK-NEXT: [[TMP24:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP23]], i32 [[TMP15]], 4 +; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP24]], 1 +; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP25]], i32 [[VCR]]) +; CHECK-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP27]]) +; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP28]], i1 true) +; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP26]], i32 [[TMP29]]) +; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[TMP26]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP31]]) +; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP30]]) +; CHECK-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP32]]) +; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP33]], -64 +; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP35]], i64 0 +; CHECK-NEXT: [[TMP37:%.*]] = bitcast <2 x i32> [[TMP36]] to i64 +; CHECK-NEXT: [[TMP38:%.*]] = inttoptr i64 [[TMP37]] to ptr +; CHECK-NEXT: [[TMP39:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP40:%.*]] = bitcast i64 [[TMP39]] to <2 x i32> +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x i32> [[TMP40]], i64 0 +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[TMP40]], i64 1 +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP46:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP47:%.*]] = insertelement <20 x i32> [[TMP46]], i32 [[TMP41]], i64 1 +; CHECK-NEXT: [[TMP48:%.*]] = insertelement <20 x i32> [[TMP47]], i32 [[TMP42]], i64 2 +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <20 x i32> [[TMP48]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP50:%.*]] = insertelement <20 x i32> [[TMP49]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP51:%.*]] = insertelement <20 x i32> [[TMP50]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <20 x i32> [[TMP51]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> [[TMP52]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[TMP43]], i64 16 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[TMP44]], i64 17 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[TMP45]], i64 18 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32i32s(ptr inreg [[TMP38]], i32 inreg [[TMP34]], <20 x i32> inreg [[TMP65]], { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP24]], i32 0) ; CHECK-NEXT: unreachable ; ;. diff --git a/lgc/test/Transforms/PatchBufferOp/simple.lgc b/lgc/test/Transforms/PatchBufferOp/simple.lgc index e172c4a40c..4f12d40c60 100644 --- a/lgc/test/Transforms/PatchBufferOp/simple.lgc +++ b/lgc/test/Transforms/PatchBufferOp/simple.lgc @@ -31,19 +31,14 @@ define amdgpu_gfx float @divergent_select(<4 x i32> inreg %desc0, <4 x i32> inre ; CHECK-NEXT: [[PTR_0:%.*]] = select i1 [[SEL:%.*]], <4 x i32> [[DESC0:%.*]], <4 x i32> [[DESC1:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[PTR_0]], i64 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 0, [[TMP1]] -; CHECK-NEXT: br i1 true, label [[TMP3:%.*]], label [[TMP11:%.*]] -; CHECK: 3: -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[PTR_0]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 -; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(1) -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP2]], i32 0, i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP7]], i32 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr addrspace(1) [[TMP9]], align 4 -; CHECK-NEXT: br label [[TMP11]] -; CHECK: 11: -; CHECK-NEXT: [[NEWVALUE:%.*]] = phi float [ 0.000000e+00, [[TMP0:%.*]] ], [ [[TMP10]], [[TMP3]] ] -; CHECK-NEXT: ret float [[NEWVALUE]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[PTR_0]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP6]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr addrspace(1) [[TMP8]], align 4 +; CHECK-NEXT: ret float [[TMP9]] ; %ptr0 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc0) %ptr1 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc1) @@ -57,19 +52,14 @@ define amdgpu_gfx float @divergent_select1(<4 x i32> %desc0, <4 x i32> inreg %de ; CHECK-NEXT: [[PTR_0:%.*]] = select i1 [[SEL:%.*]], <4 x i32> [[DESC0:%.*]], <4 x i32> [[DESC1:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[PTR_0]], i64 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 0, [[TMP1]] -; CHECK-NEXT: br i1 true, label [[TMP3:%.*]], label [[TMP11:%.*]] -; CHECK: 3: -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[PTR_0]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 -; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(1) -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP2]], i32 0, i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP7]], i32 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr addrspace(1) [[TMP9]], align 4 -; CHECK-NEXT: br label [[TMP11]] -; CHECK: 11: -; CHECK-NEXT: [[NEWVALUE:%.*]] = phi float [ 0.000000e+00, [[TMP0:%.*]] ], [ [[TMP10]], [[TMP3]] ] -; CHECK-NEXT: ret float [[NEWVALUE]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[PTR_0]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP6]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr addrspace(1) [[TMP8]], align 4 +; CHECK-NEXT: ret float [[TMP9]] ; %ptr0 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc0) %ptr1 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc1) @@ -83,19 +73,14 @@ define amdgpu_gfx float @divergent_select2(<4 x i32> inreg %desc0, <4 x i32> %de ; CHECK-NEXT: [[PTR_0:%.*]] = select i1 [[SEL:%.*]], <4 x i32> [[DESC0:%.*]], <4 x i32> [[DESC1:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[PTR_0]], i64 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 0, [[TMP1]] -; CHECK-NEXT: br i1 true, label [[TMP3:%.*]], label [[TMP11:%.*]] -; CHECK: 3: -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[PTR_0]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 -; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(1) -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP2]], i32 0, i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP7]], i32 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr addrspace(1) [[TMP9]], align 4 -; CHECK-NEXT: br label [[TMP11]] -; CHECK: 11: -; CHECK-NEXT: [[NEWVALUE:%.*]] = phi float [ 0.000000e+00, [[TMP0:%.*]] ], [ [[TMP10]], [[TMP3]] ] -; CHECK-NEXT: ret float [[NEWVALUE]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[PTR_0]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP6]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr addrspace(1) [[TMP8]], align 4 +; CHECK-NEXT: ret float [[TMP9]] ; %ptr0 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc0) %ptr1 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc1) @@ -148,19 +133,14 @@ define amdgpu_gfx float @divergent_input0_phi(<4 x i32> %desc0, <4 x i32> inreg ; CHECK-NEXT: [[PTR_0:%.*]] = phi <4 x i32> [ [[DESC0:%.*]], [[A]] ], [ [[DESC1:%.*]], [[B]] ] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[PTR_0]], i64 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 0, [[TMP1]] -; CHECK-NEXT: br i1 true, label [[TMP3:%.*]], label [[TMP11:%.*]] -; CHECK: 3: -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[PTR_0]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 -; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(1) -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP2]], i32 0, i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP7]], i32 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr addrspace(1) [[TMP9]], align 4 -; CHECK-NEXT: br label [[TMP11]] -; CHECK: 11: -; CHECK-NEXT: [[NEWVALUE:%.*]] = phi float [ 0.000000e+00, [[TAIL]] ], [ [[TMP10]], [[TMP3]] ] -; CHECK-NEXT: ret float [[NEWVALUE]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[PTR_0]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP6]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr addrspace(1) [[TMP8]], align 4 +; CHECK-NEXT: ret float [[TMP9]] ; br i1 %sel, label %a, label %b @@ -189,19 +169,14 @@ define amdgpu_gfx float @divergent_input1_phi(<4 x i32> inreg %desc0, <4 x i32> ; CHECK-NEXT: [[PTR_0:%.*]] = phi <4 x i32> [ [[DESC0:%.*]], [[A]] ], [ [[DESC1:%.*]], [[B]] ] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[PTR_0]], i64 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 0, [[TMP1]] -; CHECK-NEXT: br i1 true, label [[TMP3:%.*]], label [[TMP11:%.*]] -; CHECK: 3: -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[PTR_0]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 -; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(1) -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP2]], i32 0, i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP7]], i32 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr addrspace(1) [[TMP9]], align 4 -; CHECK-NEXT: br label [[TMP11]] -; CHECK: 11: -; CHECK-NEXT: [[NEWVALUE:%.*]] = phi float [ 0.000000e+00, [[TAIL]] ], [ [[TMP10]], [[TMP3]] ] -; CHECK-NEXT: ret float [[NEWVALUE]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[PTR_0]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP6]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr addrspace(1) [[TMP8]], align 4 +; CHECK-NEXT: ret float [[TMP9]] ; br i1 %sel, label %a, label %b @@ -230,19 +205,14 @@ define amdgpu_gfx float @divergent_sync_phi(<4 x i32> inreg %desc0, <4 x i32> in ; CHECK-NEXT: [[PTR_0:%.*]] = phi <4 x i32> [ [[DESC0:%.*]], [[A]] ], [ [[DESC1:%.*]], [[B]] ] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[PTR_0]], i64 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 0, [[TMP1]] -; CHECK-NEXT: br i1 true, label [[TMP3:%.*]], label [[TMP11:%.*]] -; CHECK: 3: -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[PTR_0]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 -; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(1) -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP2]], i32 0, i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP7]], i32 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr addrspace(1) [[TMP9]], align 4 -; CHECK-NEXT: br label [[TMP11]] -; CHECK: 11: -; CHECK-NEXT: [[NEWVALUE:%.*]] = phi float [ 0.000000e+00, [[TAIL]] ], [ [[TMP10]], [[TMP3]] ] -; CHECK-NEXT: ret float [[NEWVALUE]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[PTR_0]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP6]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr addrspace(1) [[TMP8]], align 4 +; CHECK-NEXT: ret float [[TMP9]] ; br i1 %sel, label %a, label %b diff --git a/lgc/test/Transforms/PatchBufferOp/uniform-phi.lgc b/lgc/test/Transforms/PatchBufferOp/uniform-phi.lgc index 9bdd634dfd..103daa3696 100644 --- a/lgc/test/Transforms/PatchBufferOp/uniform-phi.lgc +++ b/lgc/test/Transforms/PatchBufferOp/uniform-phi.lgc @@ -18,19 +18,14 @@ define amdgpu_gfx float @uniform_phi(<4 x i32> inreg %desc0, <4 x i32> inreg %de ; CHECK-NEXT: [[PTR_0:%.*]] = phi <4 x i32> [ [[DESC0:%.*]], [[A]] ], [ [[DESC1:%.*]], [[B]] ] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[PTR_0]], i64 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 0, [[TMP1]] -; CHECK-NEXT: br i1 true, label [[TMP3:%.*]], label [[TMP11:%.*]] -; CHECK: 3: -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[PTR_0]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 -; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(1) -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP2]], i32 0, i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP7]], i32 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr addrspace(1) [[TMP9]], align 4 -; CHECK-NEXT: br label [[TMP11]] -; CHECK: 11: -; CHECK-NEXT: [[NEWVALUE:%.*]] = phi float [ 0.000000e+00, [[TAIL]] ], [ [[TMP10]], [[TMP3]] ] -; CHECK-NEXT: ret float [[NEWVALUE]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[PTR_0]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP6]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr addrspace(1) [[TMP8]], align 4 +; CHECK-NEXT: ret float [[TMP9]] ; br i1 %sel, label %a, label %b diff --git a/lgc/test/UberFetchShader.lgc b/lgc/test/UberFetchShader.lgc index eae98f8d5d..eba122466e 100644 --- a/lgc/test/UberFetchShader.lgc +++ b/lgc/test/UberFetchShader.lgc @@ -2,7 +2,7 @@ ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.VS.main() ; Get the descriptor of Uber Fetch Shader buffer -; CHECK: [[Desc:%[0-9]*]] = call i64 @lgc.load.user.data.i64(i32 4) +; CHECK: [[Desc:%[0-9]*]] = call i64 @lgc.load.user.data__i64(i32 4) ; CHECK: [[DESCPTR:%[0-9]*]] = inttoptr i64 [[Desc]] to ptr addrspace(4) ; location mask @@ -54,12 +54,12 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spirv.ExecutionModel !10 !lgc.shaderstage !11 { .entry: - %0 = call <4 x float> @lgc.input.import.generic.v4f32(i1 false, i32 0, i32 0, i32 0, i32 poison) #1 + %0 = call <4 x float> @lgc.input.import.generic__v4f32(i1 false, i32 0, i32 0, i32 0, i32 poison) #1 ret void } ; Function Attrs: nounwind readonly willreturn -declare <4 x float> @lgc.input.import.generic.v4f32(i1, i32, i32, i32, i32) #1 +declare <4 x float> @lgc.input.import.generic__v4f32(i1, i32, i32, i32, i32) #1 ; Function Attrs: nounwind declare void @lgc.output.export.builtin.CullDistance.i32.a1f32(i32, [1 x float]) #0 diff --git a/lgc/tool/lgc/CMakeLists.txt b/lgc/tool/lgc/CMakeLists.txt index cd4d5c2e6c..2f8a01aa17 100644 --- a/lgc/tool/lgc/CMakeLists.txt +++ b/lgc/tool/lgc/CMakeLists.txt @@ -45,7 +45,7 @@ add_llvm_tool(lgc # lgc is linked in separately to account for both static and dynamic library # builds. -llvm_map_components_to_libnames(extra_llvm_libs lgc CompilerUtils Continuations) +llvm_map_components_to_libnames(extra_llvm_libs lgc CompilerUtils Raytracing) target_link_libraries(lgc PRIVATE ${extra_llvm_libs}) target_compile_definitions(lgc PRIVATE ${TARGET_ARCHITECTURE_ENDIANESS}ENDIAN_CPU) diff --git a/lgc/util/Internal.cpp b/lgc/util/Internal.cpp index 5a9a24a81e..4aca887f26 100644 --- a/lgc/util/Internal.cpp +++ b/lgc/util/Internal.cpp @@ -234,4 +234,18 @@ Type *getVgprTy(Type *ty) { return ty; } +// ===================================================================================================================== +// Helper function to create LLVM Function and update NewDbgInfoFormat flag +llvm::Function *createFunctionHelper(llvm::FunctionType *ty, llvm::GlobalValue::LinkageTypes linkage, + llvm::Module *module, const llvm::Twine &name) { + + llvm::Function *func = Function::Create(ty, linkage, name); + +#if !defined(LLVM_MAIN_REVISION) || LLVM_MAIN_REVISION >= 489715 + func->setIsNewDbgInfoFormat(module->IsNewDbgInfoFormat); +#endif + + return func; +} + } // namespace lgc diff --git a/llpc/CMakeLists.txt b/llpc/CMakeLists.txt index 10faa4a964..1a156b2c6b 100644 --- a/llpc/CMakeLists.txt +++ b/llpc/CMakeLists.txt @@ -56,7 +56,7 @@ if(ICD_BUILD_LLPC) set(LLVM_INCLUDE_DOCS OFF CACHE BOOL Force) set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL Force) set(LLVM_INCLUDE_GO_TESTS OFF CACHE BOOL Force) - set(CONTINUATIONS_BUILD_TESTS ${LLPC_BUILD_TESTS}) + set(LLVMRAYTRACING_BUILD_TESTS ${LLPC_BUILD_TESTS}) set(LLVM_INCLUDE_TESTS ${LLPC_BUILD_TESTS} CACHE BOOL Force) set(LLVM_INCLUDE_TOOLS ON CACHE BOOL Force) set(LLVM_INCLUDE_UTILS ON CACHE BOOL Force) @@ -125,7 +125,7 @@ if(ICD_BUILD_LLPC) endif() # Always link statically against libLLVMlgc - llvm_map_components_to_libnames(extra_llvm_libs lgc Continuations) + llvm_map_components_to_libnames(extra_llvm_libs lgc Raytracing) if(NOT WIN32) foreach (lib ${extra_llvm_libs}) target_compile_options(${lib} PRIVATE "-fno-aligned-new") diff --git a/llpc/context/llpcCompiler.cpp b/llpc/context/llpcCompiler.cpp index 457419a2e0..a18669c8c7 100644 --- a/llpc/context/llpcCompiler.cpp +++ b/llpc/context/llpcCompiler.cpp @@ -80,7 +80,7 @@ // New version of the code (also handles unknown version, which we treat as latest) #include "llvm/IRPrinter/IRPrintingPasses.h" #endif -#include "continuations/GpurtContext.h" +#include "llvmraytracing/GpurtContext.h" #include "llvm/Linker/Linker.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Format.h" @@ -1153,6 +1153,8 @@ Result Compiler::buildGraphicsShaderStage(const GraphicsPipelineBuildInfo *pipel it = pipeNode.find(Vkgc::DiscardState); if (it != pipeNode.end()) discardState = it->second.getBool(); + } else { + report_fatal_error("Cannot emit llvm IR or bitcode with color export shader enabled"); } } @@ -1732,6 +1734,7 @@ Result Compiler::buildPipelineInternal(Context *context, ArrayRefusage.enableRayQuery) { assert(!moduleData->usage.rayQueryLibrary); + context->ensureGpurtLibrary(); lowerPassMgr->addPass(SpirvLowerRayQuery(false)); ++numStagesWithRayQuery; } @@ -1763,7 +1766,6 @@ Result Compiler::buildPipelineInternal(Context *context, ArrayRefensureGpurtLibrary(); setUseGpurt(&*pipeline); GpurtContext &gpurtContext = GpurtContext::get(*context); @@ -3005,9 +3007,14 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, // TODO: For continuations, we only need to compile the GpuRt module separately if there are TraceRay usages // to compile the Traversal shader. For callable shaders, it is not required. if (needTraversal) { - StringRef traceRayFuncName = mainContext->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_TRACE_RAY); - StringRef fetchTrianglePosFunc = mainContext->getPipelineContext()->getRayTracingFunctionName( - Vkgc::RT_ENTRY_FETCH_HIT_TRIANGLE_FROM_NODE_POINTER); + auto fetchRayTracingFuncName = [&](Vkgc::RAYTRACING_ENTRY_FUNC attribute) -> StringRef { + return mainContext->getPipelineContext()->getRayTracingFunctionName(attribute); + }; + StringRef traceRayFuncName = fetchRayTracingFuncName(Vkgc::RT_ENTRY_TRACE_RAY); + // For continuations, the entry is _cont_Traversal. + constexpr char ContTraceRayFuncName[] = "_cont_Traversal"; + if (continuationsMode) + traceRayFuncName = ContTraceRayFuncName; std::unique_ptr traversal = CloneModule(*gpurtContext.theModule); @@ -3020,16 +3027,9 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, func->setLinkage(GlobalValue::ExternalLinkage); lgc::rt::setLgcRtShaderStage(func, lgc::rt::RayTracingShaderStage::Traversal); } else if (func->getLinkage() == GlobalValue::WeakAnyLinkage && !func->empty()) { - // Preserve fetchTrianglePosFunc because we need to inline it into Traversal later on. - // Remove other function definitions both for compile speed, and to work around an - // issue with private globals used in multiple functions in GpuRt which confuses SpirvLowerGlobal. - bool isFetchTrianglePosFunc = func->getName().starts_with(fetchTrianglePosFunc); - bool isContinuationFunc = continuationsMode && func->getName().starts_with("_cont_"); - - if (!isFetchTrianglePosFunc && !isContinuationFunc) { - func->dropAllReferences(); - func->eraseFromParent(); - } + // Remove functions other than TraceRay entry, for traversal module we only need that. + func->dropAllReferences(); + func->eraseFromParent(); } } diff --git a/llpc/context/llpcContext.cpp b/llpc/context/llpcContext.cpp index 70e1cb2af7..4d70b5205a 100644 --- a/llpc/context/llpcContext.cpp +++ b/llpc/context/llpcContext.cpp @@ -30,8 +30,6 @@ */ #include "llpcContext.h" #include "SPIRVInternal.h" -#include "continuations/ContinuationsDialect.h" -#include "continuations/GpurtContext.h" #include "llpcCompiler.h" #include "llpcDebug.h" #include "llpcPipelineContext.h" @@ -43,6 +41,8 @@ #include "llpcSpirvLowerTranslator.h" #include "llpcSpirvProcessGpuRtLibrary.h" #include "llpcTimerProfiler.h" +#include "llvmraytracing/ContinuationsDialect.h" +#include "llvmraytracing/GpurtContext.h" #include "vkgcMetroHash.h" #include "lgc/Builder.h" #include "lgc/GpurtDialect.h" @@ -112,7 +112,8 @@ LgcContext *Context::getLgcContext() { if (!m_targetMachine) report_fatal_error(Twine("Unknown target '") + Twine(gpuName) + Twine("'")); m_builderContext.reset(LgcContext::create(&*m_targetMachine, *this, PAL_CLIENT_INTERFACE_MAJOR_VERSION)); - lgc::GpurtContext::get(*this).theModule.reset(); + lgc::GpurtContext::get(*this).theModule = nullptr; + lgc::GpurtContext::get(*this).ownedTheModule.reset(); // Pass the state of LLPC_OUTS on to LGC. LgcContext::setLlpcOuts(EnableOuts() ? &outs() : nullptr); @@ -219,8 +220,10 @@ void Context::ensureGpurtLibrary() { key.gpurtFeatureFlags = rtState->gpurtFeatureFlags; // gpurtFeatureFlags affect which GPURT library we're using key.hwIntersectRay = rtState->bvhResDesc.dataSizeInDwords > 0; - if (gpurtContext.theModule && key != m_currentGpurtKey) - gpurtContext.theModule.reset(); + if (gpurtContext.ownedTheModule && key != m_currentGpurtKey) { + gpurtContext.theModule = nullptr; + gpurtContext.ownedTheModule.reset(); + } if (gpurtContext.theModule) return; @@ -270,7 +273,8 @@ void Context::ensureGpurtLibrary() { lowerPassMgr->run(*gpurt); - gpurtContext.theModule = std::move(gpurt); + gpurtContext.ownedTheModule = std::move(gpurt); + gpurtContext.theModule = gpurtContext.ownedTheModule.get(); } } // namespace Llpc diff --git a/llpc/context/llpcGraphicsContext.cpp b/llpc/context/llpcGraphicsContext.cpp index 58480e7cfb..b371ce09f0 100644 --- a/llpc/context/llpcGraphicsContext.cpp +++ b/llpc/context/llpcGraphicsContext.cpp @@ -177,6 +177,7 @@ unsigned GraphicsContext::getSubgroupSizeUsage() const { void GraphicsContext::setPipelineState(Pipeline *pipeline, Util::MetroHash64 *hasher, bool unlinked) const { PipelineContext::setPipelineState(pipeline, hasher, unlinked); const unsigned stageMask = getShaderStageMask(); + bool disableDualSourceBlend = false; if (pipeline) { // Give the shader options (including the hash) to the middle-end. @@ -187,6 +188,10 @@ void GraphicsContext::setPipelineState(Pipeline *pipeline, Util::MetroHash64 *ha assert(shaderInfo); pipeline->setShaderOptions(getLgcShaderStage(static_cast(stage)), computeShaderOptions(*shaderInfo)); + + const ShaderModuleData *moduleData = reinterpret_cast(shaderInfo->pModuleData); + if (stage == ShaderStageFragment && moduleData && moduleData->usage.disableDualSource) + disableDualSourceBlend = true; } } @@ -198,7 +203,7 @@ void GraphicsContext::setPipelineState(Pipeline *pipeline, Util::MetroHash64 *ha if ((isShaderStageInMask(ShaderStageFragment, stageMask) && (!unlinked || DisableColorExportShader)) || (stageMask == 0)) { // Give the color export state to the middle-end. Empty stage mask indicates color export shader. - setColorExportState(pipeline, hasher); + setColorExportState(pipeline, hasher, disableDualSourceBlend); } // Give the graphics pipeline state to the middle-end. @@ -248,7 +253,7 @@ Options GraphicsContext::computePipelineOptions() const { options.enableUberFetchShader = pipelineInfo->enableUberFetchShader; options.enableColorExportShader = pipelineInfo->enableColorExportShader; options.useSoftwareVertexBufferDescriptors = pipelineInfo->useSoftwareVertexBufferDescriptors; - options.vbAddressLowBitsKnown = pipelineInfo->vbAddressLowBitsKnown; + options.vbAddressLowBitsKnown = pipelineInfo->getGlState().vbAddressLowBitsKnown; if (getGfxIpVersion().major >= 10) { // Only set NGG options for a GFX10+ graphics pipeline. const auto &nggState = pipelineInfo->nggState; @@ -296,7 +301,8 @@ Options GraphicsContext::computePipelineOptions() const { // // @param [in/out] pipeline : Middle-end pipeline object; nullptr if only hashing // @param [in/out] hasher : Hasher object; nullptr if only setting LGC pipeline state -void GraphicsContext::setColorExportState(Pipeline *pipeline, Util::MetroHash64 *hasher) const { +void GraphicsContext::setColorExportState(Pipeline *pipeline, Util::MetroHash64 *hasher, + bool disableDualSourceBlend) const { auto pipelineInfo = reinterpret_cast(getPipelineBuildInfo()); const auto &cbState = pipelineInfo->cbState; @@ -312,6 +318,10 @@ void GraphicsContext::setColorExportState(Pipeline *pipeline, Util::MetroHash64 state.dualSourceBlendEnable = cbState.dualSourceBlendEnable; state.dualSourceBlendDynamicEnable = cbState.dualSourceBlendDynamic; + // Update enable flag according to Shader Decoration + if (cbState.dualSourceBlendDynamic && disableDualSourceBlend) + state.dualSourceBlendDynamicEnable = false; + for (unsigned targetIndex = 0; targetIndex < MaxColorTargets; ++targetIndex) { if (cbState.target[targetIndex].format != VK_FORMAT_UNDEFINED) { auto dfmt = BufDataFormatInvalid; @@ -388,9 +398,10 @@ void GraphicsContext::setVertexInputDescriptions(Pipeline *pipeline, Util::Metro // Gather the vertex inputs. SmallVector descriptions; - auto vbLowBits = static_cast(getPipelineBuildInfo())->vbAddressLowBits; + auto vbLowBits = + static_cast(getPipelineBuildInfo())->getGlState().vbAddressLowBits; auto vbAddressLowBitsKnown = - static_cast(getPipelineBuildInfo())->vbAddressLowBitsKnown; + static_cast(getPipelineBuildInfo())->getGlState().vbAddressLowBitsKnown; for (unsigned i = 0; i < vertexInput->vertexAttributeDescriptionCount; ++i) { auto attrib = &vertexInput->pVertexAttributeDescriptions[i]; if (attrib->binding >= bindings.size()) diff --git a/llpc/context/llpcGraphicsContext.h b/llpc/context/llpcGraphicsContext.h index 9ff9ac8cd1..247cf318f2 100644 --- a/llpc/context/llpcGraphicsContext.h +++ b/llpc/context/llpcGraphicsContext.h @@ -87,7 +87,8 @@ class GraphicsContext : public PipelineContext { virtual lgc::Options computePipelineOptions() const override; // Give the color export state to the middle-end, and/or hash it. - void setColorExportState(lgc::Pipeline *pipeline, Util::MetroHash64 *hasher) const; + void setColorExportState(lgc::Pipeline *pipeline, Util::MetroHash64 *hasher, + bool disableDualSourceBlend = false) const; // Set vertex input descriptions in middle-end Pipeline, and/or hash them. void setVertexInputDescriptions(lgc::Pipeline *pipeline, Util::MetroHash64 *hasher) const; diff --git a/llpc/context/llpcRayTracingContext.h b/llpc/context/llpcRayTracingContext.h index 5b4eaa0db5..d95f35eab0 100644 --- a/llpc/context/llpcRayTracingContext.h +++ b/llpc/context/llpcRayTracingContext.h @@ -117,6 +117,7 @@ class RayTracingContext : public PipelineContext { unsigned hasLibraryStage(unsigned stageMask) { return m_pipelineInfo->pipelineLibStageMask & stageMask; } bool isReplay() { return m_pipelineInfo->isReplay; } Vkgc::LlpcRaytracingMode getRaytracingMode() { return m_pipelineInfo->mode; } + unsigned getCpsFlag() { return m_pipelineInfo->cpsFlags; } protected: // Give the pipeline options to the middle-end, and/or hash them. diff --git a/llpc/docs/DdnPackShaderInputOutput.md b/llpc/docs/DdnPackShaderInputOutput.md index 2c9db7fa54..4fb52204ea 100644 --- a/llpc/docs/DdnPackShaderInputOutput.md +++ b/llpc/docs/DdnPackShaderInputOutput.md @@ -7,8 +7,8 @@ This DDN introduces the workflow change of packing in/out in VS/FS pipeline in L VS/FS is the most popular pipeline. To reduce risk, we support packing for VS/FS pipeline as the goal of phase 1. Considering component-based interpolation of the input of fragment shader, we adopt the idea of vector scalarization and then re-assembling vectors to achieve the purpose of packing. We use cl::PackInOut as a global switch control. In the LLPC middle-end, fragment shader inputs are represented by two kinds of intrinsic: ``` -@lgc.input.import.generic.%Type%(i32 location, i32 elemIdx, i32 interpMode, i32 interpLoc) -@lgc.input.import.interpolant.%Type%(i32 location, i32 locOffset, i32 elemIdx, i32 interpMode, T auxInterValue) +@lgc.input.import.generic__%Type%(i32 location, i32 elemIdx, i32 interpMode, i32 interpLoc) +@lgc.input.import.interpolant__%Type%(i32 location, i32 locOffset, i32 elemIdx, i32 interpMode, T auxInterValue) ``` ## 3 Interface change No interface change. diff --git a/llpc/lower/PrepareContinuations.cpp b/llpc/lower/PrepareContinuations.cpp index 8f03bc97c2..b03e24c4dd 100644 --- a/llpc/lower/PrepareContinuations.cpp +++ b/llpc/lower/PrepareContinuations.cpp @@ -30,8 +30,9 @@ */ #include "PrepareContinuations.h" #include "compilerutils/CompilerUtils.h" -#include "continuations/GpurtContext.h" #include "llpcContext.h" +#include "llvmraytracing/ContinuationsUtil.h" +#include "llvmraytracing/GpurtContext.h" #include "lgc/Builder.h" #define DEBUG_TYPE "prepare-continuations" @@ -57,7 +58,9 @@ PreservedAnalyses PrepareContinuations::run(Module &module, ModuleAnalysisManage mode.workgroupSizeX = rtState->threadGroupSizeX; mode.workgroupSizeY = rtState->threadGroupSizeY; mode.workgroupSizeZ = rtState->threadGroupSizeZ; + mode.noLocalInvocationIdInCalls = true; Pipeline::setComputeShaderMode(module, mode); + module.getOrInsertNamedMetadata(ContHelper::MDLgcCpsModuleName); if (module.getName().starts_with("main")) { m_shaderStage = ShaderStageRayTracingRayGen; diff --git a/llpc/lower/llpcSpirvLowerCooperativeMatrix.cpp b/llpc/lower/llpcSpirvLowerCooperativeMatrix.cpp index 0ecd2fb6b4..8fbb0ec3df 100644 --- a/llpc/lower/llpcSpirvLowerCooperativeMatrix.cpp +++ b/llpc/lower/llpcSpirvLowerCooperativeMatrix.cpp @@ -35,6 +35,7 @@ #include "llpcSpirvLowerCooperativeMatrix.h" #include "llpcDialect.h" #include "lgc/BuilderCommon.h" +#include "lgc/LgcDialect.h" #include "llvm/IR/Instructions.h" #define DEBUG_TYPE "llpc-spirv-lower-cooperative-matrix" @@ -55,8 +56,8 @@ class LowerCooperativeMatrix { private: void visitProxy(CallInst &call); - void visitPointerUsers(Value *ptr, BuilderCommon::CooperativeMatrixElementType elemTypeEnum, - BuilderCommon::CooperativeMatrixLayout layout, Type *elemType, Value *matrixPtr, Value *index); + void visitPointerUsers(Value *ptr, CooperativeMatrixElementType elemTypeEnum, CooperativeMatrixLayout layout, + Type *elemType, Value *matrixPtr, Value *index); Module &m_module; BuilderCommon m_builder; @@ -95,10 +96,9 @@ PreservedAnalyses LowerCooperativeMatrix::run() { // @returns true if a change was made void LowerCooperativeMatrix::visitProxy(CallInst &call) { Value *ptr = call.getArgOperand(0); - auto elemTypeEnum = - (BuilderCommon::CooperativeMatrixElementType)(cast(call.getArgOperand(1))->getZExtValue()); + auto elemTypeEnum = (CooperativeMatrixElementType)(cast(call.getArgOperand(1))->getZExtValue()); Type *elemType = m_builder.transCooperativeMatrixElementType(elemTypeEnum); - auto layout = (BuilderCommon::CooperativeMatrixLayout)(cast(call.getArgOperand(2))->getZExtValue()); + auto layout = (CooperativeMatrixLayout)(cast(call.getArgOperand(2))->getZExtValue()); m_toDelete.push_back(&call); visitPointerUsers(&call, elemTypeEnum, layout, elemType, ptr, m_builder.getInt32(0)); @@ -112,9 +112,9 @@ void LowerCooperativeMatrix::visitProxy(CallInst &call) { // @param layout : the matrix layout // @param matrixPtr : the pointer to the underlying proxied matrix // @param index : the 32-bit index of the matrix that @p ptr points to -void LowerCooperativeMatrix::visitPointerUsers(Value *ptr, BuilderCommon::CooperativeMatrixElementType elemTypeEnum, - BuilderCommon::CooperativeMatrixLayout layout, Type *elemType, - Value *matrixPtr, Value *index) { +void LowerCooperativeMatrix::visitPointerUsers(Value *ptr, CooperativeMatrixElementType elemTypeEnum, + CooperativeMatrixLayout layout, Type *elemType, Value *matrixPtr, + Value *index) { for (User *user : ptr->users()) { Instruction *inst = cast(user); m_builder.SetInsertPoint(inst); diff --git a/llpc/lower/llpcSpirvLowerGlobal.cpp b/llpc/lower/llpcSpirvLowerGlobal.cpp index 39ce3d3f8d..b6e4ee24f3 100644 --- a/llpc/lower/llpcSpirvLowerGlobal.cpp +++ b/llpc/lower/llpcSpirvLowerGlobal.cpp @@ -209,33 +209,37 @@ PreservedAnalyses SpirvLowerGlobal::run(Module &module, ModuleAnalysisManager &a changeRtFunctionSignature(); - // Map globals to proxy variables - for (auto global = m_module->global_begin(), end = m_module->global_end(); global != end; ++global) { - if (global->getType()->getAddressSpace() == SPIRAS_Private) - mapGlobalVariableToProxy(&*global); - else if (global->getType()->getAddressSpace() == SPIRAS_Input) - mapInputToProxy(&*global); - else if (global->getType()->getAddressSpace() == SPIRAS_Output) - mapOutputToProxy(&*global); - } - - // NOTE: Global variable, include general global variable, input and output is a special constant variable, so if - // it is referenced by constant expression, we need translate constant expression to normal instruction first, - // Otherwise, we will hit assert in replaceAllUsesWith() when we replace global variable with proxy variable. + // First pass over globals for (GlobalVariable &global : m_module->globals()) { auto addrSpace = global.getType()->getAddressSpace(); - // Remove constant expressions for global variables in these address spaces - bool isGlobalVar = addrSpace == SPIRAS_Private || addrSpace == SPIRAS_Input || addrSpace == SPIRAS_Output; + if (addrSpace == SPIRAS_Private || addrSpace == SPIRAS_Input || addrSpace == SPIRAS_Output) { + // Remove constant indexing expression and remove any proxy variables that are needed. (But the proxies aren't + // used yet for inputs/outputs.) + removeConstantExpr(m_context, &global); + + if (addrSpace == SPIRAS_Private) + mapGlobalVariableToProxy(&global); + else if (addrSpace == SPIRAS_Input) + mapInputToProxy(&global); + else if (addrSpace == SPIRAS_Output) + mapOutputToProxy(&global); + } else if (addrSpace == SPIRAS_Local) { + // Prefix all LDS variables to avoid downstream conflicts when linking shaders together + if (global.hasName()) { + global.setName(Twine("lds_") + getShaderStageName(m_shaderStage) + "_" + global.getName()); + } + } + } - if (!isGlobalVar) - continue; - removeConstantExpr(m_context, &global); + // Remove global variables that were already fully replaced + for (auto globalVar : m_globalsToErase) { + globalVar->dropAllReferences(); + globalVar->eraseFromParent(); } + m_globalsToErase.clear(); // Do lowering operations - lowerGlobalVar(); - if (m_lowerInputInPlace && m_lowerOutputInPlace) { // Both input and output have to be lowered in-place (without proxy variables) lowerInOutInPlace(); // Just one lowering operation is sufficient @@ -362,6 +366,12 @@ void SpirvLowerGlobal::handleCallInst(bool checkEmitCall, bool checkInterpCall) } else if (mangledName.starts_with(gSPIRVName::InterpolateAtOffset)) { interpLoc = InterpLocCenter; auxInterpValue = callInst->getArgOperand(1); // Offset from pixel center + auto info = static_cast(m_context->getPipelineBuildInfo()); + if (info->getGlState().originUpperLeft) { + auto yInvertOffset = m_builder->CreateExtractElement(auxInterpValue, 1); + yInvertOffset = m_builder->CreateFNeg(yInvertOffset); + auxInterpValue = m_builder->CreateInsertElement(auxInterpValue, yInvertOffset, 1); + } } else { assert(mangledName.starts_with(gSPIRVName::InterpolateAtVertexAMD)); interpLoc = InterpLocCustom; @@ -590,7 +600,6 @@ void SpirvLowerGlobal::mapGlobalVariableToProxy(GlobalVariable *globalVar) { Type *globalVarTy = globalVar->getValueType(); Value *proxy = nullptr; - removeConstantExpr(m_context, globalVar); // Handle special globals, regular allocas will be removed by SROA pass. if (globalVar->getName().starts_with(RtName::HitAttribute)) { proxy = m_entryPoint->getArg(1); @@ -625,7 +634,7 @@ void SpirvLowerGlobal::mapGlobalVariableToProxy(GlobalVariable *globalVar) { } } - m_globalVarProxy.insert(globalVar); + m_globalsToErase.push_back(globalVar); } // ===================================================================================================================== @@ -699,22 +708,6 @@ void SpirvLowerGlobal::mapOutputToProxy(GlobalVariable *output) { m_outputProxyMap.emplace_back(output, proxy); } -// ===================================================================================================================== -// Does lowering operations for SPIR-V global variables, replaces global variables with proxy variables. -void SpirvLowerGlobal::lowerGlobalVar() { - if (m_globalVarProxy.empty()) { - // Skip lowering if there is no global variable - return; - } - - // remove global variables - for (auto globalVar : m_globalVarProxy) { - globalVar->dropAllReferences(); - globalVar->eraseFromParent(); - } - m_globalVarProxy.clear(); -} - // ===================================================================================================================== // Does lowering operations for SPIR-V inputs, replaces inputs with proxy variables. void SpirvLowerGlobal::lowerInput() { @@ -987,7 +980,9 @@ Value *SpirvLowerGlobal::createRaytracingBuiltIn(BuiltIn builtIn) { case BuiltInRayTmaxKHR: return m_builder->create(); case BuiltInInstanceCustomIndexKHR: - return m_builder->create(); + // Note: GPURT(HLSL) has just the opposite naming of index/ID compares to SPIR-V. For dialect calls, we use + // GPURT-style. + return m_builder->create(); case BuiltInObjectToWorldKHR: return m_builder->create(); case BuiltInWorldToObjectKHR: @@ -1001,7 +996,9 @@ Value *SpirvLowerGlobal::createRaytracingBuiltIn(BuiltIn builtIn) { case BuiltInRayGeometryIndexKHR: return m_builder->create(); case BuiltInInstanceId: - return m_builder->create(); + // Note: GPURT(HLSL) has just the opposite naming of index/ID compares to SPIR-V. For dialect calls, we use + // GPURT-style. + return m_builder->create(); case BuiltInPrimitiveId: return m_builder->create(); case BuiltInCullMaskKHR: @@ -1213,7 +1210,7 @@ Value *SpirvLowerGlobal::addCallInstForInOutImport(Type *inOutTy, unsigned addrS inOutValue = m_builder->CreateExtractElement(inOutValue, uint64_t(0)); } else if (builtIn == lgc::BuiltInFragCoord) { auto buildInfo = static_cast(m_context->getPipelineBuildInfo()); - if (buildInfo->originUpperLeft != + if (buildInfo->getGlState().originUpperLeft != static_cast(buildInfo->fs.pModuleData)->usage.originUpperLeft) { unsigned offset = 0; auto winSize = getUniformConstantEntryByLocation(m_context, m_shaderStage, @@ -1245,6 +1242,12 @@ Value *SpirvLowerGlobal::addCallInstForInOutImport(Type *inOutTy, unsigned addrS } } } else { + lgc::InOutInfo inOutInfo; + inOutInfo.setComponent(inOutMeta.Component); + // Specify NumComponents if components are dynamically indexed + if (elemIdx && !isa(elemIdx)) + inOutInfo.setNumComponents(inOutMeta.NumComponents); + unsigned idx = inOutMeta.Component; assert(inOutMeta.Component <= 3); if (inOutTy->getScalarSizeInBits() == 64) { @@ -1253,9 +1256,6 @@ Value *SpirvLowerGlobal::addCallInstForInOutImport(Type *inOutTy, unsigned addrS } elemIdx = !elemIdx ? m_builder->getInt32(idx) : m_builder->CreateAdd(elemIdx, m_builder->getInt32(idx)); - lgc::InOutInfo inOutInfo; - inOutInfo.setComponent(inOutMeta.Component); - if (!locOffset) locOffset = m_builder->getInt32(0); @@ -1430,6 +1430,10 @@ void SpirvLowerGlobal::addCallInstForOutputExport(Value *outputValue, Constant * } m_builder->CreateWriteBuiltInOutput(outputValue, builtInId, outputInfo, vertexOrPrimitiveIdx, elemIdx); return; + } else { + // Specify NumComponents if components are dynamically indexed + if (elemIdx && !isa(elemIdx)) + outputInfo.setNumComponents(outputMeta.NumComponents); } unsigned location = outputMeta.Value + outputMeta.Index; @@ -2362,8 +2366,8 @@ void SpirvLowerGlobal::interpolateInputElement(unsigned interpLoc, Value *auxInt // Fill the XFB info map from the Vkgc::ApiXfbOutData if XFB is specified by API interface void SpirvLowerGlobal::buildApiXfbMap() { auto pipelineBuildInfo = static_cast(m_context->getPipelineBuildInfo()); - for (unsigned idx = 0; idx < pipelineBuildInfo->apiXfbOutData.numXfbOutInfo; ++idx) { - const auto &xfbInfo = pipelineBuildInfo->apiXfbOutData.pXfbOutInfos[idx]; + for (unsigned idx = 0; idx < pipelineBuildInfo->getGlState().apiXfbOutData.numXfbOutInfo; ++idx) { + const auto &xfbInfo = pipelineBuildInfo->getGlState().apiXfbOutData.pXfbOutInfos[idx]; unsigned location = xfbInfo.location; if (xfbInfo.isBuiltIn) { if (m_builtInXfbMap.find(location) == m_builtInXfbMap.end()) { diff --git a/llpc/lower/llpcSpirvLowerGlobal.h b/llpc/lower/llpcSpirvLowerGlobal.h index 6cf2efe167..1ca6cd6ade 100644 --- a/llpc/lower/llpcSpirvLowerGlobal.h +++ b/llpc/lower/llpcSpirvLowerGlobal.h @@ -67,7 +67,6 @@ class SpirvLowerGlobal : public SpirvLower, public llvm::PassInfoMixin m_globalVarProxy; // The unordered_set for lowering global variables + llvm::SmallVector m_globalsToErase; std::unordered_map m_inputProxyMap; // Proxy map for lowering inputs // NOTE: Here we use list to store pairs of output proxy mappings. This is because we want output patching to be diff --git a/llpc/lower/llpcSpirvLowerMath.cpp b/llpc/lower/llpcSpirvLowerMath.cpp index e46e293bfd..a9eb91a5c0 100644 --- a/llpc/lower/llpcSpirvLowerMath.cpp +++ b/llpc/lower/llpcSpirvLowerMath.cpp @@ -39,7 +39,9 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils.h" @@ -51,6 +53,7 @@ using namespace lgc; using namespace llvm; +using namespace PatternMatch; using namespace SPIRV; using namespace Llpc; @@ -457,7 +460,6 @@ void SpirvLowerMathFloatOp::visitBinaryOperator(BinaryOperator &binaryOp) { isa(src1) || (isa(src1) && cast(src1)->isZero()); bool src2IsConstZero = isa(src2) || (isa(src2) && cast(src2)->isZero()); - Value *dest = nullptr; if (opCode == Instruction::FSub && src1IsConstZero) { // NOTE: Source1 is constant zero, we might be performing FNEG operation. This will be optimized @@ -468,6 +470,7 @@ void SpirvLowerMathFloatOp::visitBinaryOperator(BinaryOperator &binaryOp) { // NOTE: We can't do constant folding for the following floating operations if we have floating-point controls that // will flush denormals or preserve NaN. if (!m_fp16DenormFlush && !m_fp32DenormFlush && !m_fp64DenormFlush) { + Value *dest = nullptr; switch (opCode) { case Instruction::FAdd: if (binaryOp.getFastMathFlags().noNaNs()) { @@ -507,11 +510,12 @@ void SpirvLowerMathFloatOp::visitBinaryOperator(BinaryOperator &binaryOp) { binaryOp.eraseFromParent(); m_changed = true; + return; } } // Replace FDIV x, y with FDIV 1.0, y; MUL x if it isn't optimized - if (opCode == Instruction::FDiv && !dest && src1 && src2) { + if (opCode == Instruction::FDiv) { Constant *one = ConstantFP::get(binaryOp.getType(), 1.0); if (src1 != one) { IRBuilder<> builder(*m_context); @@ -525,6 +529,39 @@ void SpirvLowerMathFloatOp::visitBinaryOperator(BinaryOperator &binaryOp) { binaryOp.eraseFromParent(); m_changed = true; + return; + } + } + + // Replace mul with amdgcn_fmul_legacy intrinsic when detect patterns like: + // ((b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b)) + if (opCode == Instruction::FMul) { + Value *src1CmpValue = nullptr; + Value *src1FalseValue = nullptr; + Value *src2CmpValue = nullptr; + Value *src2FalseValue = nullptr; + FCmpInst::Predicate pred = FCmpInst::FCMP_OEQ; + // Detect whether A = (b==0.0 ? 0.0 : a) and parse out b and a + bool src1Match = + match(src1, m_Select(m_FCmp(pred, m_Value(src1CmpValue), m_AnyZeroFP()), m_Zero(), m_Value(src1FalseValue))); + // Detect whether B = (a'==0.0 ? 0.0 : b') and output a' and b' + bool src2Match = + match(src2, m_Select(m_FCmp(pred, m_Value(src2CmpValue), m_AnyZeroFP()), m_Zero(), m_Value(src2FalseValue))); + // If b == b' && a == a' then use fmul_legacy(a,b) instead of fmul(A,B) + if (src1Match && src2Match) { + if ((src1CmpValue == src2FalseValue) && (src2CmpValue == src1FalseValue)) { + IRBuilder<> builder(*m_context); + builder.SetInsertPoint(&binaryOp); + builder.setFastMathFlags(binaryOp.getFastMathFlags()); + Value *fmulzResult = + builder.CreateIntrinsic(Intrinsic::amdgcn_fmul_legacy, {}, {src1FalseValue, src2FalseValue}); + binaryOp.replaceAllUsesWith(fmulzResult); + binaryOp.dropAllReferences(); + binaryOp.eraseFromParent(); + + m_changed = true; + return; + } } } } diff --git a/llpc/lower/llpcSpirvLowerRayQuery.cpp b/llpc/lower/llpcSpirvLowerRayQuery.cpp index 8cc360eaeb..48eded8142 100644 --- a/llpc/lower/llpcSpirvLowerRayQuery.cpp +++ b/llpc/lower/llpcSpirvLowerRayQuery.cpp @@ -31,10 +31,13 @@ #include "llpcSpirvLowerRayQuery.h" #include "SPIRVInternal.h" +#include "compilerutils/CompilerUtils.h" #include "llpcContext.h" #include "llpcSpirvLowerUtil.h" +#include "llvmraytracing/GpurtContext.h" #include "lgc/Builder.h" #include "lgc/GpurtDialect.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IntrinsicsAMDGPU.h" @@ -43,6 +46,7 @@ using namespace spv; using namespace llvm; using namespace Llpc; +using namespace CompilerUtils; namespace SPIRV { extern const char *MetaNameSpirvOp; @@ -52,7 +56,6 @@ namespace RtName { const char *LdsUsage = "LdsUsage"; const char *PrevRayQueryObj = "PrevRayQueryObj"; const char *RayQueryObjGen = "RayQueryObjGen"; -static const char *FetchTrianglePositionFromRayQuery = "FetchTrianglePositionFromRayQuery"; } // namespace RtName // Enum for the RayDesc @@ -291,6 +294,8 @@ SpirvLowerRayQuery::SpirvLowerRayQuery(bool rayQueryLibrary) // @param [in/out] module : LLVM module to be run on // @param [in/out] analysisManager : Analysis manager to use for this transformation PreservedAnalyses SpirvLowerRayQuery::run(Module &module, ModuleAnalysisManager &analysisManager) { + m_crossModuleInliner = std::make_optional(); + auto onExit = make_scope_exit([&] { m_crossModuleInliner.reset(); }); LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-ray-query\n"); SpirvLower::init(&module); createGlobalRayQueryObj(); @@ -688,7 +693,7 @@ void SpirvLowerRayQuery::createRayQueryFuncCreateRet(instanceIndex); } @@ -708,7 +713,7 @@ template <> void SpirvLowerRayQuery::createRayQueryFuncCreateRet(instanceId); } @@ -1019,10 +1024,9 @@ void SpirvLowerRayQuery::createIntersectMatrix(Function *func, unsigned builtInI auto committedInstanceNodePtr = m_builder->CreateExtractValue(committed, RaySystemParams::InstanceNodePtr); Value *instanceNodePtr = m_builder->CreateSelect(intersect, committedInstanceNodePtr, candidateInstanceNodePtr); Value *instanceNodeAddr = createGetInstanceNodeAddr(instanceNodePtr, rayQuery); - Value *instanceId = createLoadInstanceIndex(instanceNodeAddr); Instruction *brInst = m_builder->CreateBr(endBlock); - Value *matrix = createTransformMatrix(builtInId, accelStruct, instanceId, brInst); + Value *matrix = createTransformMatrix(builtInId, instanceNodeAddr, brInst); m_builder->SetInsertPoint(endBlock); m_builder->CreateRet(matrix); } @@ -1052,6 +1056,12 @@ void SpirvLowerRayQuery::createRayQueryFuncaddFnAttr(Attribute::AlwaysInline); BasicBlock *entryBlock = BasicBlock::Create(*m_context, ".entry", func); m_builder->SetInsertPoint(entryBlock); + + // Cross module inliner cannot be used to inline a function with multiple blocks into in a degenerate block, create + // a temporary terminator first. + auto tempTerminator = m_builder->CreateUnreachable(); + m_builder->SetInsertPoint(tempTerminator); + Value *rayQuery = func->arg_begin(); Value *intersectVal = func->arg_begin() + 1; Value *intersectPtr = m_builder->CreateAlloca(m_builder->getInt32Ty()); @@ -1060,10 +1070,12 @@ void SpirvLowerRayQuery::createRayQueryFuncgetFloatTy(), 3); - auto triangleDataTy = StructType::get(*m_context, {floatx3Ty, floatx3Ty, floatx3Ty}); - auto triangleData = - m_builder->CreateNamedCall(RtName::FetchTrianglePositionFromRayQuery, triangleDataTy, {rayQuery, intersectPtr}, - {Attribute::NoUnwind, Attribute::AlwaysInline}); + auto triangleData = m_crossModuleInliner.value() + .inlineCall(*m_builder, + getGpurtFunction(m_context->getPipelineContext()->getRayTracingFunctionName( + Vkgc::RT_ENTRY_FETCH_HIT_TRIANGLE_FROM_RAY_QUERY)), + {rayQuery, intersectPtr}) + .returnValue; // Return type of OpRayQueryGetIntersectionTriangleVertexPositionsKHR is array of vec3 (vec3[3]). auto retType = ArrayType::get(floatx3Ty, 3); @@ -1071,6 +1083,7 @@ void SpirvLowerRayQuery::createRayQueryFuncCreateInsertValue(ret, m_builder->CreateExtractValue(triangleData, {i}), {i}); m_builder->CreateRet(ret); + tempTerminator->eraseFromParent(); } // ===================================================================================================================== @@ -1183,61 +1196,15 @@ unsigned SpirvLowerRayQuery::getFuncOpcode(Function *func) { } // ===================================================================================================================== -// Create WorldToObject/ObjectToWorld Matrix by given instance ID +// Create WorldToObject/ObjectToWorld Matrix by GpuRt Library Func. // // @param builtInId : ID of the built-in variable -// @param accelStruct : Top accelerate structure -// @param instanceId : Instance ID +// @param instanceNodeAddr : instanceNode Address // @param insertPos : Where to insert instructions -Value *SpirvLowerRayQuery::createTransformMatrix(unsigned builtInId, Value *accelStruct, Value *instanceId, - Instruction *insertPos) { +Value *SpirvLowerRayQuery::createTransformMatrix(unsigned builtInId, Value *instanceNodeAddr, Instruction *insertPos) { assert(builtInId == BuiltInWorldToObjectKHR || builtInId == BuiltInObjectToWorldKHR); m_builder->SetInsertPoint(insertPos); - Value *zero = m_builder->getInt32(0); - - // offsetof(AccelStructHeader, dataOffsets) + offsetof(AccelStructOffsets, leafNodes) - unsigned instanceNodeOffset = offsetof(AccelStructHeader, dataOffsets) + offsetof(ResultDataOffsets, leafNodes); - Value *instanceNodeOffsetVal = m_builder->getInt32(instanceNodeOffset); - - auto int32x2Ty = FixedVectorType::get(m_builder->getInt32Ty(), 2); - - instanceNodeOffsetVal = - m_builder->CreateInsertElement(PoisonValue::get(int32x2Ty), instanceNodeOffsetVal, uint64_t(0)); - - instanceNodeOffsetVal = m_builder->CreateInsertElement(instanceNodeOffsetVal, zero, 1); - Value *instanceNodeOffsetAddr = m_builder->CreateAdd(accelStruct, instanceNodeOffsetVal); - - // Bitcast instanceNodeOffsetAddr to i64 integer - instanceNodeOffsetAddr = m_builder->CreateBitCast(instanceNodeOffsetAddr, m_builder->getInt64Ty()); - Type *gpuAddrAsPtrTy = PointerType::get(*m_context, SPIRAS_Global); - auto instNodeOffsetAddrAsPtr = m_builder->CreateIntToPtr(instanceNodeOffsetAddr, gpuAddrAsPtrTy); - Value *baseInstOffset = m_builder->CreateConstGEP1_32(m_builder->getInt8Ty(), instNodeOffsetAddrAsPtr, 0); - Type *baseInstOffsetTy = m_builder->getInt32Ty()->getPointerTo(SPIRAS_Global); - - // Load base instance offset from InstanceNodeOffsetAddr - baseInstOffset = m_builder->CreateBitCast(baseInstOffset, baseInstOffsetTy); - baseInstOffset = m_builder->CreateLoad(m_builder->getInt32Ty(), baseInstOffset); - - // Instance node includes the instance descriptor (64-bytes) followed by the extra instance node - // data (64-bytes). - Value *instanceNodeStrideShift = m_builder->getInt32(7); - - // Offset into the instance node - instanceId = m_builder->CreateShl(instanceId, instanceNodeStrideShift); - Value *matrixOffset = m_builder->CreateAdd(baseInstOffset, instanceId); - - if (builtInId == BuiltInObjectToWorldKHR) { - // The ObjectToWorld transform is at a 80 byte offset within the extra data structure - Value *transformOffset = m_builder->getInt32(80); - matrixOffset = m_builder->CreateAdd(matrixOffset, transformOffset); - } - - Value *vecMatrixOffset = PoisonValue::get(int32x2Ty); - vecMatrixOffset = m_builder->CreateInsertElement(vecMatrixOffset, matrixOffset, uint64_t(0)); - vecMatrixOffset = m_builder->CreateInsertElement(vecMatrixOffset, zero, 1); - Value *matrixAddr = m_builder->CreateAdd(accelStruct, vecMatrixOffset); - - return createLoadMatrixFromAddr(matrixAddr); + return createLoadMatrixFromFunc(instanceNodeAddr, builtInId); } // ===================================================================================================================== @@ -1272,109 +1239,71 @@ bool SpirvLowerRayQuery::stageNotSupportLds(ShaderStage stage) { } // ===================================================================================================================== -// Create instructions to load instance index given the 64-bit instance node address at the current insert point -// +// Create instructions to load instance index/id given the 64-bit instance node address at the current insert point +// Note: HLSL has just the opposite naming of index/ID compares to SPIR-V. +// So "isIndex = true" means we use InstanceId(InstanceIndex for GPURT) for vulkan, +// and "isIndex = false" means we use InstanceIndex(InstanceId for GPURT) for vulkan, // @param instNodeAddr : 64-bit instance node address, in <2 x i32> -Value *SpirvLowerRayQuery::createLoadInstanceIndex(Value *instNodeAddr) { - Value *zero = m_builder->getInt32(0); - Type *gpuAddrAsPtrTy = PointerType::get(*m_context, SPIRAS_Global); - auto int32x2Ty = FixedVectorType::get(m_builder->getInt32Ty(), 2); +Value *SpirvLowerRayQuery::createLoadInstanceIndexOrId(Value *instNodeAddr, bool isIndex) { + Value *instanceIdPtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt64Ty()); + m_builder->CreateStore(instNodeAddr, instanceIdPtr); - const unsigned instanceIndexOffset = offsetof(RayTracingInstanceNode, extra.instanceIndex); + StringRef getterName = isIndex + ? m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_INSTANCE_INDEX) + : m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_INSTANCE_ID); - Value *instanceIndexOffsetVar = PoisonValue::get(int32x2Ty); - instanceIndexOffsetVar = - m_builder->CreateInsertElement(instanceIndexOffsetVar, m_builder->getInt32(instanceIndexOffset), uint64_t(0)); - instanceIndexOffsetVar = m_builder->CreateInsertElement(instanceIndexOffsetVar, zero, 1); - Value *instanceIndexAddr = m_builder->CreateAdd(instNodeAddr, instanceIndexOffsetVar); + auto cmiResult = m_crossModuleInliner.value().inlineCall(*m_builder, getGpurtFunction(getterName), {instanceIdPtr}); - instanceIndexAddr = m_builder->CreateBitCast(instanceIndexAddr, m_builder->getInt64Ty()); - auto instanceIndexAddrAsPtr = m_builder->CreateIntToPtr(instanceIndexAddr, gpuAddrAsPtrTy); - auto loadValue = m_builder->CreateConstGEP1_32(m_builder->getInt8Ty(), instanceIndexAddrAsPtr, 0); - loadValue = m_builder->CreateBitCast(loadValue, PointerType::get(*m_context, SPIRAS_Global)); - - return m_builder->CreateLoad(m_builder->getInt32Ty(), loadValue); + return cmiResult.returnValue; } // ===================================================================================================================== -// Create instructions to get instance node address given the instance node pointer at the current insert point +// Call GpuRt Library to get instance node address given the instance node pointer at the current +// insert point // // @param instNodePtr : Instance node pointer -// @param rayQuery : Ray query structure Value *SpirvLowerRayQuery::createGetInstanceNodeAddr(Value *instNodePtr, Value *rayQuery) { - auto int32x2Ty = FixedVectorType::get(m_builder->getInt32Ty(), 2); - Value *zero = m_builder->getInt32(0); - Value *BvhAddrLo = m_builder->CreateExtractValue(rayQuery, RayQueryParams::TopLevelBvhLo); Value *BvhAddrHi = m_builder->CreateExtractValue(rayQuery, RayQueryParams::TopLevelBvhHi); Value *BvhAddr = PoisonValue::get(FixedVectorType::get(Type::getInt32Ty(*m_context), 2)); BvhAddr = m_builder->CreateInsertElement(BvhAddr, BvhAddrLo, uint64_t(0)); BvhAddr = m_builder->CreateInsertElement(BvhAddr, BvhAddrHi, 1); - // Mask out the node offset - auto nodeOffsetMask = m_builder->getInt32(0xFFFFFFF8u); - // Shift left by 3 to make it 64B aligned address - auto nodeOffsetShift = m_builder->getInt32(3u); - - auto nodeOffset = m_builder->CreateAnd(instNodePtr, nodeOffsetMask); - nodeOffset = m_builder->CreateShl(nodeOffset, nodeOffsetShift); - - Value *instNodeOffset = PoisonValue::get(int32x2Ty); - instNodeOffset = m_builder->CreateInsertElement(instNodeOffset, nodeOffset, uint64_t(0)); - instNodeOffset = m_builder->CreateInsertElement(instNodeOffset, zero, 1); - auto instNodeAddr = m_builder->CreateAdd(BvhAddr, instNodeOffset); - return instNodeAddr; -} - -// ===================================================================================================================== -// Create instructions to load instance ID given the 64-bit instance node address at the current insert point -// -// @param instNodeAddr : 64-bit instance node address, in <2 x i32> -Value *SpirvLowerRayQuery::createLoadInstanceId(Value *instNodeAddr) { - Value *zero = m_builder->getInt32(0); - Type *gpuAddrAsPtrTy = PointerType::get(*m_context, SPIRAS_Global); - auto int32x2Ty = FixedVectorType::get(m_builder->getInt32Ty(), 2); + StringRef getInstanceNodeAddr = + m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_GET_INSTANCE_NODE); - const unsigned instanceIdOffset = offsetof(RayTracingInstanceNode, desc.InstanceID_and_Mask); + auto bvhAddr = m_builder->CreateBitCast(BvhAddr, m_builder->getInt64Ty()); + Value *bvhPtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt64Ty()); + Value *nodePtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt32Ty()); + m_builder->CreateStore(bvhAddr, bvhPtr); + m_builder->CreateStore(instNodePtr, nodePtr); - Value *instanceIdOffsetVar = PoisonValue::get(int32x2Ty); - instanceIdOffsetVar = - m_builder->CreateInsertElement(instanceIdOffsetVar, m_builder->getInt32(instanceIdOffset), uint64_t(0)); - instanceIdOffsetVar = m_builder->CreateInsertElement(instanceIdOffsetVar, zero, 1); - Value *instanceIdAddr = m_builder->CreateAdd(instNodeAddr, instanceIdOffsetVar); - - instanceIdAddr = m_builder->CreateBitCast(instanceIdAddr, m_builder->getInt64Ty()); - auto instanceIdAddrAsPtr = m_builder->CreateIntToPtr(instanceIdAddr, gpuAddrAsPtrTy); - auto loadValue = m_builder->CreateConstGEP1_32(m_builder->getInt8Ty(), instanceIdAddrAsPtr, 0); - loadValue = m_builder->CreateBitCast(loadValue, PointerType::get(*m_context, SPIRAS_Global)); - - loadValue = m_builder->CreateLoad(m_builder->getInt32Ty(), loadValue); - // Mask out the instance ID in lower 24 bits - loadValue = m_builder->CreateAnd(loadValue, 0x00FFFFFFu); - - return loadValue; + auto cmiResult = + m_crossModuleInliner.value().inlineCall(*m_builder, getGpurtFunction(getInstanceNodeAddr), {bvhPtr, nodePtr}); + return cmiResult.returnValue; } // ===================================================================================================================== -// Create instructions to load a 3x4 matrix from given address at the current insert point +// Call GpuRt Library Func to load a 3x4 matrix from given address at the current insert point // -// @param matrixAddr : Matrix address, which type is <2 x i32> -Value *SpirvLowerRayQuery::createLoadMatrixFromAddr(Value *matrixAddr) { - Value *zero = m_builder->getInt32(0); - Type *gpuAddrAsPtrTy = PointerType::get(*m_context, SPIRAS_Global); - - // Bitcast matrixAddr to i64 integer - matrixAddr = m_builder->CreateBitCast(matrixAddr, m_builder->getInt64Ty()); - auto matrixAddrAsPtr = m_builder->CreateIntToPtr(matrixAddr, gpuAddrAsPtrTy); - +// @param instanceNodeAddr : instanceNode address, which type is i64 +Value *SpirvLowerRayQuery::createLoadMatrixFromFunc(Value *instanceNodeAddr, unsigned builtInId) { auto floatx3Ty = FixedVectorType::get(m_builder->getFloatTy(), 3); - auto floatx4Ty = FixedVectorType::get(m_builder->getFloatTy(), 4); auto matrixTy = ArrayType::get(floatx3Ty, 4); - auto loadPtrTy = floatx4Ty->getPointerTo(SPIRAS_Global); + Value *instandeNodeAddrPtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt64Ty()); + m_builder->CreateStore(instanceNodeAddr, instandeNodeAddrPtr); + + StringRef getMatrixFunc; + if (builtInId == BuiltInObjectToWorldKHR) { + getMatrixFunc = + m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_OBJECT_TO_WORLD_TRANSFORM); + } else { + getMatrixFunc = + m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_WORLD_TO_OBJECT_TRANSFORM); + } - // Construct [4 x <3 x float>] Value *matrixRow[4] = { PoisonValue::get(floatx3Ty), PoisonValue::get(floatx3Ty), @@ -1382,26 +1311,27 @@ Value *SpirvLowerRayQuery::createLoadMatrixFromAddr(Value *matrixAddr) { PoisonValue::get(floatx3Ty), }; - // Matrix in the memory is [3 x <4 x float>], need to transform to [4 x <3 x float>] - Value *loadOffset = zero; - Value *stride = m_builder->getInt32(sizeof(float) * 4); - // For Three columns for (unsigned i = 0; i < 3; ++i) { - Value *loadValue = m_builder->CreateGEP(m_builder->getInt8Ty(), matrixAddrAsPtr, loadOffset); - loadValue = m_builder->CreateBitCast(loadValue, loadPtrTy); - auto rowValue = m_builder->CreateLoad(floatx4Ty, loadValue); + Value *row = m_builder->getInt32(i); for (unsigned j = 0; j < 4; ++j) { - auto element = m_builder->CreateExtractElement(rowValue, uint64_t(j)); - matrixRow[j] = m_builder->CreateInsertElement(matrixRow[j], element, uint64_t(i)); + Value *col = m_builder->getInt32(j); + + Value *colPtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt32Ty()); + Value *rowPtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt32Ty()); + m_builder->CreateStore(col, colPtr); + m_builder->CreateStore(row, rowPtr); + + auto cmiMatrixResult = m_crossModuleInliner.value().inlineCall(*m_builder, getGpurtFunction(getMatrixFunc), + {instandeNodeAddrPtr, rowPtr, colPtr}); + matrixRow[j] = m_builder->CreateInsertElement(matrixRow[j], cmiMatrixResult.returnValue, uint64_t(i)); } - loadOffset = m_builder->CreateAdd(loadOffset, stride); } + Value *matrix = PoisonValue::get(matrixTy); matrix = m_builder->CreateInsertValue(matrix, matrixRow[0], 0); matrix = m_builder->CreateInsertValue(matrix, matrixRow[1], 1); matrix = m_builder->CreateInsertValue(matrix, matrixRow[2], 2); matrix = m_builder->CreateInsertValue(matrix, matrixRow[3], 3); - return matrix; } @@ -1414,4 +1344,13 @@ Value *SpirvLowerRayQuery::getThreadIdInGroup() const { return m_builder->CreateReadBuiltInInput(static_cast(builtIn)); } +// ===================================================================================================================== +// Looks up an exported function in the GPURT module +Function *SpirvLowerRayQuery::getGpurtFunction(StringRef name) { + auto &gpurtContext = lgc::GpurtContext::get(*m_context); + Function *fn = gpurtContext.theModule->getFunction(name); + assert(fn); + return fn; +} + } // namespace Llpc diff --git a/llpc/lower/llpcSpirvLowerRayQuery.h b/llpc/lower/llpcSpirvLowerRayQuery.h index 3becb9f588..6c4df2e191 100644 --- a/llpc/lower/llpcSpirvLowerRayQuery.h +++ b/llpc/lower/llpcSpirvLowerRayQuery.h @@ -31,6 +31,7 @@ #pragma once #include "SPIRVInternal.h" +#include "compilerutils/CompilerUtils.h" #include "llpcSpirvLower.h" #include "llvm/IR/PassManager.h" @@ -95,6 +96,10 @@ struct RayTracingInstanceNode { }; #pragma pack(pop) +namespace CompilerUtils { +class CrossModuleInliner; +} // namespace CompilerUtils + namespace Llpc { // Corresponds to gl_RayFlags* in GLSL_EXT_ray_tracing.txt @@ -130,16 +135,17 @@ class SpirvLowerRayQuery : public SpirvLower, public llvm::PassInfoMixin m_crossModuleInliner; + private: template void createRayQueryFunc(llvm::Function *func); void createRayQueryProceedFunc(llvm::Function *func); diff --git a/llpc/lower/llpcSpirvLowerRayTracing.cpp b/llpc/lower/llpcSpirvLowerRayTracing.cpp index 17f5b57db2..b95541eea6 100644 --- a/llpc/lower/llpcSpirvLowerRayTracing.cpp +++ b/llpc/lower/llpcSpirvLowerRayTracing.cpp @@ -31,12 +31,13 @@ #include "llpcSpirvLowerRayTracing.h" #include "SPIRVInternal.h" -#include "continuations/ContinuationsUtil.h" -#include "continuations/GpurtContext.h" +#include "compilerutils/CompilerUtils.h" #include "gpurt-compiler.h" #include "llpcContext.h" #include "llpcRayTracingContext.h" #include "llpcSpirvLowerUtil.h" +#include "llvmraytracing/ContinuationsUtil.h" +#include "llvmraytracing/GpurtContext.h" #include "lgc/Builder.h" #include "lgc/CommonDefs.h" #include "lgc/GpurtDialect.h" @@ -45,6 +46,7 @@ #include "lgc/LgcRtDialect.h" #include "lgc/Pipeline.h" #include "llvm-dialects/Dialect/Visitor.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" @@ -66,6 +68,7 @@ extern opt TrimDebugInfo; using namespace llvm; using namespace Llpc; using namespace lgc::rt; +using namespace CompilerUtils; namespace SPIRV { extern const char *MetaNameSpirvOp; @@ -76,7 +79,6 @@ const char *TraceRayKHR = "_cs_"; const char *TraceRaySetTraceParams = "TraceRaySetTraceParams"; const char *ShaderTable = "ShaderTable"; static const char *CallAnyHitShader = "AmdTraceRayCallAnyHitShader"; -static const char *FetchTrianglePositionFromNodePointer = "FetchTrianglePositionFromNodePointer"; static const char *RemapCapturedVaToReplayVa = "AmdTraceRayRemapCapturedVaToReplayVa"; static const char *ContinufyStageMeta = "continufy.stage"; } // namespace RtName @@ -419,6 +421,9 @@ PreservedAnalyses SpirvLowerRayTracing::run(Module &module, ModuleAnalysisManage mode.workgroupSizeZ = rtState->threadGroupSizeZ; lgc::Pipeline::setComputeShaderMode(module, mode); + m_crossModuleInliner = std::make_optional(); + auto onExit = make_scope_exit([&] { m_crossModuleInliner.reset(); }); + // Create empty raygen main module if (module.empty()) { m_shaderStage = ShaderStageRayTracingRayGen; @@ -1509,6 +1514,18 @@ void SpirvLowerRayTracing::inlineTraceRay(llvm::CallInst *callInst, ModuleAnalys auto &psi = analysisManager.getResult(*m_module); auto calleeFunc = callInst->getCalledFunction(); auto callingFunc = callInst->getCaller(); +#if !defined(LLVM_MAIN_REVISION) || LLVM_MAIN_REVISION >= 489715 + // Check if conversion to NewDbgFormat is needed for calleeFunc. + // If we are inside PassManger then m_module and all its Functions and BB may be converted (depending if feature is + // turned on) to new Debug Info format. Since calleeFunc is a new function which will be added/inlined into m_module, + // we have to convert these function to new Debug Info format. + // + // Since calleeFunc will be removed after inline then there is no need to convert it back to old DbgInfoFormat after + // InlineFunction. + bool shouldConvert = m_module->IsNewDbgInfoFormat && !calleeFunc->IsNewDbgInfoFormat; + if (shouldConvert) + calleeFunc->convertToNewDbgValues(); +#endif InlineFunctionInfo IFI(getAssumptionCache, &psi, &getBFI(*callingFunc), &getBFI(*calleeFunc)); InlineResult res = InlineFunction(*callInst, IFI, /*MergeAttributes=*/true, &getAAR(*calleeFunc), true); (void(res)); // unused @@ -2092,26 +2109,10 @@ Value *SpirvLowerRayTracing::createLoadRayTracingMatrix(unsigned builtInId) { m_builder->SetInsertPoint(m_insertPosPastInit); - auto int32x2Ty = FixedVectorType::get(m_builder->getInt32Ty(), 2); - Value *zero = m_builder->getInt32(0); - // Get matrix address from instance node address Value *instNodeAddr = createLoadInstNodeAddr(); - Value *matrixAddr = instNodeAddr; - - unsigned transformOffset = offsetof(RayTracingInstanceNode, desc.Transform); - if (builtInId == BuiltInObjectToWorldKHR) { - transformOffset = offsetof(RayTracingInstanceNode, extra.Transform); - } - - Value *matrixOffset = PoisonValue::get(int32x2Ty); - matrixOffset = m_builder->CreateInsertElement(matrixOffset, m_builder->getInt32(transformOffset), uint64_t(0)); - matrixOffset = m_builder->CreateInsertElement(matrixOffset, zero, 1); - - matrixAddr = m_builder->CreateAdd(matrixAddr, matrixOffset); - - return createLoadMatrixFromAddr(matrixAddr); + return createLoadMatrixFromFunc(instNodeAddr, builtInId); } // ===================================================================================================================== @@ -2122,6 +2123,9 @@ void SpirvLowerRayTracing::createSetHitTriangleNodePointer(Function *func) { eraseFunctionBlocks(func); BasicBlock *entryBlock = BasicBlock::Create(*m_context, "", func); m_builder->SetInsertPoint(entryBlock); + // Cross module inliner cannot be used to inline a function with multiple blocks into in a degenerate block, create + // the terminator first. + m_builder->SetInsertPoint(m_builder->CreateRetVoid()); if (m_builtInParams.find(TraceParam::HitTriangleVertexPositions) != m_builtInParams.end()) { Value *bvh = func->arg_begin(); Value *nodePtr = func->arg_begin() + 1; @@ -2133,13 +2137,14 @@ void SpirvLowerRayTracing::createSetHitTriangleNodePointer(Function *func) { m_builder->CreateStore(bvh, bvhPtr); m_builder->CreateStore(nodePtr, nodePtrPtr); - auto triangleDataTy = m_traceParamsTys[TraceParam::HitTriangleVertexPositions]; - auto triangleData = - m_builder->CreateNamedCall(RtName::FetchTrianglePositionFromNodePointer, triangleDataTy, {bvhPtr, nodePtrPtr}, - {Attribute::NoUnwind, Attribute::AlwaysInline}); + auto triangleData = m_crossModuleInliner.value() + .inlineCall(*m_builder, + getGpurtFunction(m_context->getPipelineContext()->getRayTracingFunctionName( + Vkgc::RT_ENTRY_FETCH_HIT_TRIANGLE_FROM_NODE_POINTER)), + {bvhPtr, nodePtrPtr}) + .returnValue; m_builder->CreateStore(triangleData, vertexPos); } - m_builder->CreateRetVoid(); } // ===================================================================================================================== @@ -2690,7 +2695,7 @@ void SpirvLowerRayTracing::visitInstanceIndexOp(lgc::rt::InstanceIndexOp &inst) m_builder->SetInsertPoint(&inst); auto instNodeAddr = createLoadInstNodeAddr(); - auto instanceIndex = createLoadInstanceId(instNodeAddr); + auto instanceIndex = createLoadInstanceIndexOrId(instNodeAddr, true); inst.replaceAllUsesWith(instanceIndex); m_callsToLower.push_back(&inst); @@ -2798,7 +2803,7 @@ void SpirvLowerRayTracing::visitInstanceIdOp(lgc::rt::InstanceIdOp &inst) { m_builder->SetInsertPoint(&inst); auto instNodeAddr = createLoadInstNodeAddr(); - auto instanceId = createLoadInstanceIndex(instNodeAddr); + auto instanceId = createLoadInstanceIndexOrId(instNodeAddr, false); inst.replaceAllUsesWith(instanceId); m_callsToLower.push_back(&inst); @@ -2997,7 +3002,8 @@ void SpirvLowerRayTracing::createSqttCallCompactToken(ShaderStage stage) { // ===================================================================================================================== // Creates instructions to emit SQTT shader data function return token void SpirvLowerRayTracing::createSqttFunctionReturnToken() { - m_builder->CreateIntrinsic(Intrinsic::amdgcn_s_ttracedata, {}, m_builder->getInt32(SqttWellKnownTypeFunctionReturn)); + m_builder->CreateIntrinsic(Intrinsic::amdgcn_s_ttracedata_imm, {}, + m_builder->getInt16(SqttWellKnownTypeFunctionReturn)); } // ===================================================================================================================== diff --git a/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp b/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp index b645153cb8..37a5f8f1fb 100644 --- a/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp +++ b/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp @@ -30,15 +30,19 @@ */ #include "llpcSpirvProcessGpuRtLibrary.h" #include "SPIRVInternal.h" -#include "continuations/ContinuationsUtil.h" +#include "compilerutils/CompilerUtils.h" #include "llpcContext.h" +#include "llpcRayTracingContext.h" #include "llpcSpirvLowerInternalLibraryIntrinsicUtil.h" #include "llpcSpirvLowerUtil.h" +#include "llvmraytracing/Continuations.h" +#include "llvmraytracing/ContinuationsUtil.h" #include "lgc/Builder.h" #include "lgc/GpurtDialect.h" #include "lgc/LgcContext.h" #include "lgc/LgcCpsDialect.h" #include "lgc/LgcRtDialect.h" +#include "llvm/ADT/SmallBitVector.h" #define DEBUG_TYPE "llpc-spirv-lower-gpurt-library" using namespace lgc; @@ -121,6 +125,8 @@ SpirvProcessGpuRtLibrary::LibraryFunctionTable::LibraryFunctionTable() { m_libFuncPtrs["_AmdContStackFree"] = &SpirvProcessGpuRtLibrary::createContStackFree; m_libFuncPtrs["_AmdContStackGetPtr"] = &SpirvProcessGpuRtLibrary::createContStackGetPtr; m_libFuncPtrs["_AmdContStackSetPtr"] = &SpirvProcessGpuRtLibrary::createContStackSetPtr; + m_libFuncPtrs["_AmdContinuationStackIsGlobal"] = &SpirvProcessGpuRtLibrary::createContinuationStackIsGlobal; + m_libFuncPtrs["_AmdGetRtip"] = &SpirvProcessGpuRtLibrary::createGetRtip; } // ===================================================================================================================== @@ -142,17 +148,72 @@ void SpirvProcessGpuRtLibrary::processLibraryFunction(Function *&func) { const StringRef fetchTrianglePositionFromRayQueryFuncName = m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_FETCH_HIT_TRIANGLE_FROM_RAY_QUERY); + const StringRef getInstanceIndex = + m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_INSTANCE_INDEX); + + const StringRef getInstanceId = + m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_INSTANCE_ID); + + const StringRef getInstanceNodeAddr = + m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_GET_INSTANCE_NODE); + + const StringRef getObjToWorldTrans = + m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_OBJECT_TO_WORLD_TRANSFORM); + + const StringRef getWorldToObjTrans = + m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_WORLD_TO_OBJECT_TRANSFORM); + assert(!traceRayFuncName.empty()); assert(!rayQueryInitializeFuncName.empty()); assert(!rayQueryProceedFuncName.empty()); assert(!fetchTrianglePositionFromNodePointerFuncName.empty()); assert(!fetchTrianglePositionFromRayQueryFuncName.empty()); + assert(!getInstanceIndex.empty()); + assert(!getInstanceId.empty()); + assert(!getInstanceNodeAddr.empty()); + assert(!getObjToWorldTrans.empty()); + assert(!getWorldToObjTrans.empty()); + + bool isAmdAwaitLike = funcName.starts_with("_AmdAwait") || funcName.starts_with("_AmdWaitAwait"); + if (funcName.starts_with("_cont_") || isAmdAwaitLike) { + func->setLinkage(GlobalValue::WeakAnyLinkage); + // Delete function body of _Amd*Await, it will be handled in LowerRaytracingPipeline. + if (isAmdAwaitLike) + func->deleteBody(); + + // The function might not have types metadata like _cont_SetupRayGen or _AmdAwait which is a declaration, nothing + // needs to be done. + if (!func->getMetadata(ContHelper::MDTypesName)) + return; + + SmallBitVector promotionMask(func->arg_size()); + for (unsigned argNo = 0; argNo < func->arg_size(); argNo++) { + auto *arg = func->getArg(argNo); + ContArgTy argTy = ContArgTy::get(func, arg); + auto funcName = func->getName(); + + if (!argTy.isPointerTy()) + continue; + + // Change the pointer type to its value type for non-struct types. + // Amd*Await, use value types for all arguments. + // For _cont_SetTriangleHitAttributes, we always use its value type for hitAttributes argument. + if (!isa(argTy.getPointerElementType()) || isAmdAwaitLike || + (funcName == ContDriverFunc::SetTriangleHitAttributesName && argNo == 1)) + promotionMask.set(argNo); + } + + promotePointerArguments(func, promotionMask); + return; + } // Set external linkage for library entry functions if (funcName.starts_with(traceRayFuncName) || funcName.starts_with(rayQueryInitializeFuncName) || funcName.starts_with(rayQueryProceedFuncName) || funcName.starts_with(fetchTrianglePositionFromNodePointerFuncName) || - funcName.starts_with(fetchTrianglePositionFromRayQueryFuncName) || funcName.starts_with("_cont_")) { + funcName.starts_with(fetchTrianglePositionFromRayQueryFuncName) || funcName.starts_with(getInstanceIndex) || + funcName.starts_with(getInstanceId) || funcName.starts_with(getInstanceNodeAddr) || + funcName.starts_with(getObjToWorldTrans) || funcName.starts_with(getWorldToObjTrans)) { func->setLinkage(GlobalValue::WeakAnyLinkage); return; } @@ -179,6 +240,17 @@ void SpirvProcessGpuRtLibrary::processLibraryFunction(Function *&func) { m_builder->SetInsertPoint(clearBlock(func)); createEnqueue(func); return; + } else if (funcName.starts_with("_AmdGetUninitialized")) { + m_builder->SetInsertPoint(clearBlock(func)); + m_builder->CreateRet(PoisonValue::get(func->getReturnType())); + return; + } else if (funcName.starts_with("_AmdGetShaderKind") || funcName.starts_with("_AmdGetCurrentFuncAddr") || + funcName.starts_with("_AmdGetResumePointAddr")) { + // These _Amd* functions are handled in later continuation transformations, delete the function body to preserve the + // call. + func->deleteBody(); + func->setLinkage(GlobalValue::WeakAnyLinkage); + return; } // Create implementation for intrinsic functions. @@ -742,15 +814,8 @@ void SpirvProcessGpuRtLibrary::createDispatchThreadIdFlat(llvm::Function *func) // // @param func : The function to create void SpirvProcessGpuRtLibrary::createContStackAlloc(llvm::Function *func) { - Value *byteSize = nullptr; - if (func->arg_size() == 2) { - // TODO: Remove this when refactoring is done. - // Ignore the first argument. - byteSize = m_builder->CreateLoad(m_builder->getInt32Ty(), func->getArg(1)); - } else { - assert(func->arg_size() == 1); - byteSize = m_builder->CreateLoad(m_builder->getInt32Ty(), func->getArg(0)); - } + assert(func->arg_size() == 1); + Value *byteSize = m_builder->CreateLoad(m_builder->getInt32Ty(), func->getArg(0)); auto stackPtr = m_builder->create(byteSize); m_builder->CreateRet(m_builder->CreatePtrToInt(stackPtr, m_builder->getInt32Ty())); } @@ -824,14 +889,14 @@ void SpirvProcessGpuRtLibrary::createEnqueue(Function *func) { bool hasRetAddrArg = !funcName.contains("RayGen") && !funcName.contains("Traversal"); bool hasWaitMaskArg = funcName.contains("Wait"); if (hasRetAddrArg) { - // Skip csp and waitMask - unsigned retAddrArgIdx = hasWaitMaskArg ? 3 : 2; + // Skip waitMask + unsigned retAddrArgIdx = hasWaitMaskArg ? 2 : 1; tailArgs.push_back(m_builder->CreateLoad(m_builder->getInt32Ty(), func->getArg(retAddrArgIdx))); } else { tailArgs.push_back(PoisonValue::get(m_builder->getInt32Ty())); } // Get shader-index from system-data. - unsigned systemDataArgIdx = 2 + (hasRetAddrArg ? 1 : 0) + (hasWaitMaskArg ? 1 : 0); + unsigned systemDataArgIdx = 1 + (hasRetAddrArg ? 1 : 0) + (hasWaitMaskArg ? 1 : 0); tailArgs.push_back(m_builder->CreateNamedCall("_cont_GetLocalRootIndex", m_builder->getInt32Ty(), {func->getArg(systemDataArgIdx)}, {})); // Process system-data and arguments after. @@ -846,4 +911,21 @@ void SpirvProcessGpuRtLibrary::createEnqueue(Function *func) { m_builder->CreateUnreachable(); } +// Fill in function to check whether continuation stack is global +// +// @param func : The function to create +void SpirvProcessGpuRtLibrary::createContinuationStackIsGlobal(llvm::Function *func) { + m_builder->CreateRet(m_builder->create()); +} + +// ===================================================================================================================== +// Fill in function to get RTIP +// +// @param func : The function to create +void SpirvProcessGpuRtLibrary::createGetRtip(llvm::Function *func) { + auto rtip = m_context->getPipelineContext()->getRayTracingState()->rtIpVersion; + // The version is encoded as in decimal digits, so 11 is rtip 1.1, 20 is rtip 2.0 + m_builder->CreateRet(m_builder->getInt32(rtip.major * 10 + rtip.minor)); +} + } // namespace Llpc diff --git a/llpc/lower/llpcSpirvProcessGpuRtLibrary.h b/llpc/lower/llpcSpirvProcessGpuRtLibrary.h index 0760df729a..07a32ab10e 100644 --- a/llpc/lower/llpcSpirvProcessGpuRtLibrary.h +++ b/llpc/lower/llpcSpirvProcessGpuRtLibrary.h @@ -98,6 +98,8 @@ class SpirvProcessGpuRtLibrary : public SpirvLower, public llvm::PassInfoMixin}}, ptr addrspace(7) [[TMP0]], i32 {{12|0, i32 2, i32 1}} // SHADERTEST-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(7) [[TMP5]], align 4 // SHADERTEST-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP1]], [[TMP6]] -// SHADERTEST-NEXT: [[TMP8:%.*]] = and i1 [[TMP4]], [[TMP7]] +// SHADERTEST-NEXT: [[TMP8:%.*]] = and i1 {{%4|%7}}, {{%7|%4}} // SHADERTEST-NEXT: [[TMP9:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP8]], <4 x float> zeroinitializer, <4 x float> // SHADERTEST-NEXT: call void (...) @lgc.create.write.generic.output(<4 x float> [[TMP9]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) // SHADERTEST-NEXT: ret void diff --git a/llpc/test/shaderdb/core/TestReverseThreadGroup.comp b/llpc/test/shaderdb/core/TestReverseThreadGroup.comp index 753ee7d3e9..4882089b5f 100644 --- a/llpc/test/shaderdb/core/TestReverseThreadGroup.comp +++ b/llpc/test/shaderdb/core/TestReverseThreadGroup.comp @@ -20,9 +20,9 @@ void main() // - get the gl_NumWorkGroups // - get the internal descriptor table // Note that `@lgc.load.user.data` is generated after lowering of `@lgc.load.buffer.desc` (lgc-lower-desc pass). -// REVERSETEST-DAG: %{{[0-9]+}} = call i32 @lgc.load.user.data.i32(i32 0) +// REVERSETEST-DAG: %{{[0-9]+}} = call i32 @lgc.load.user.data__i32(i32 0) // REVERSETEST-DAG: %{{[0-9]+}} = call ptr addrspace(4) @lgc.special.user.data.Workgroup(i32 268435462) -// REVERSETEST-DAG: %{{[0-9]+}} = call i32 @lgc.load.user.data.i32(i32 4) +// REVERSETEST-DAG: %{{[0-9]+}} = call i32 @lgc.load.user.data__i32(i32 4) // There should be a select between the reversed thread group ID and original thread group ID // REVERSETEST: %{{[0-9]+}} = select i1 %{{[0-9]+}}, <3 x i32> %{{[0-9]+}}, <3 x i32> %{{[0-9]+}} // REVERSETEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestFsInput_lit.frag b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestFsInput_lit.frag index 1b84263664..f3e4c205f7 100644 --- a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestFsInput_lit.frag +++ b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestFsInput_lit.frag @@ -26,10 +26,10 @@ void main (void) ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-DAG: call <3 x i16> (...) @lgc.input.import.interpolated.v3i16{{.*}} -; SHADERTEST-DAG: call <3 x half> (...) @lgc.input.import.interpolated.v3f16{{.*}} -; SHADERTEST-DAG: call i16 (...) @lgc.input.import.interpolated.i16{{.*}} -; SHADERTEST-DAG: call half (...) @lgc.input.import.interpolated.f16{{.*}} +; SHADERTEST-DAG: call <3 x i16> (...) @lgc.input.import.interpolated__v3i16{{.*}} +; SHADERTEST-DAG: call <3 x half> (...) @lgc.input.import.interpolated__v3f16{{.*}} +; SHADERTEST-DAG: call i16 (...) @lgc.input.import.interpolated__i16{{.*}} +; SHADERTEST-DAG: call half (...) @lgc.input.import.interpolated__f16{{.*}} ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestGsInput_lit.geom b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestGsInput_lit.geom index 0ffbc8a68c..7f42feaa08 100644 --- a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestGsInput_lit.geom +++ b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestGsInput_lit.geom @@ -36,10 +36,10 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call <3 x i16> @lgc.input.import.generic.v3i16{{.*}} -; SHADERTEST: call i16 @lgc.input.import.generic.i16{{.*}} -; SHADERTEST: call <3 x half> @lgc.input.import.generic.v3f16{{.*}} -; SHADERTEST: call half @lgc.input.import.generic.f16{{.*}} +; SHADERTEST: call <3 x i16> @lgc.input.import.generic__v3i16{{.*}} +; SHADERTEST: call i16 @lgc.input.import.generic__i16{{.*}} +; SHADERTEST: call <3 x half> @lgc.input.import.generic__v3f16{{.*}} +; SHADERTEST: call half @lgc.input.import.generic__f16{{.*}} ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTcsInput_lit.tesc b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTcsInput_lit.tesc index 47b773a868..bd6c07014d 100644 --- a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTcsInput_lit.tesc +++ b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTcsInput_lit.tesc @@ -31,10 +31,10 @@ void main(void) ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call i32 @lgc.input.import.builtin.InvocationId{{.*}} -; SHADERTEST: call half @lgc.input.import.generic.f16{{.*}} -; SHADERTEST: call <3 x half> @lgc.input.import.generic.v3f16{{.*}} -; SHADERTEST: call i16 @lgc.input.import.generic.i16{{.*}} -; SHADERTEST: call <3 x i16> @lgc.input.import.generic.v3i16{{.*}} +; SHADERTEST: call half @lgc.input.import.generic__f16{{.*}} +; SHADERTEST: call <3 x half> @lgc.input.import.generic__v3f16{{.*}} +; SHADERTEST: call i16 @lgc.input.import.generic__i16{{.*}} +; SHADERTEST: call <3 x i16> @lgc.input.import.generic__v3i16{{.*}} ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTesInput_lit.tese b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTesInput_lit.tese index 3bad082c22..6f7d93869d 100644 --- a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTesInput_lit.tese +++ b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTesInput_lit.tese @@ -28,10 +28,10 @@ void main(void) ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-DAG: call half @lgc.input.import.generic.f16{{.*}} -; SHADERTEST-DAG: call <3 x half> @lgc.input.import.generic.v3f16{{.*}} -; SHADERTEST-DAG: call i16 @lgc.input.import.generic.i16{{.*}} -; SHADERTEST-DAG: call <3 x i16> @lgc.input.import.generic.v3i16{{.*}} +; SHADERTEST-DAG: call half @lgc.input.import.generic__f16{{.*}} +; SHADERTEST-DAG: call <3 x half> @lgc.input.import.generic__v3f16{{.*}} +; SHADERTEST-DAG: call i16 @lgc.input.import.generic__i16{{.*}} +; SHADERTEST-DAG: call <3 x i16> @lgc.input.import.generic__v3i16{{.*}} ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestVsInput_lit.vert b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestVsInput_lit.vert index ba93c69303..8fe19a4896 100644 --- a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestVsInput_lit.vert +++ b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestVsInput_lit.vert @@ -24,10 +24,10 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-DAG: call i16 @lgc.input.import.generic.i16{{.*}} -; SHADERTEST-DAG: call <3 x i16> @lgc.input.import.generic.v3i16{{.*}} -; SHADERTEST-DAG: call half @lgc.input.import.generic.f16{{.*}} -; SHADERTEST-DAG: call <3 x half> @lgc.input.import.generic.v3f16{{.*}} +; SHADERTEST-DAG: call i16 @lgc.input.import.generic__i16{{.*}} +; SHADERTEST-DAG: call <3 x i16> @lgc.input.import.generic__v3i16{{.*}} +; SHADERTEST-DAG: call half @lgc.input.import.generic__f16{{.*}} +; SHADERTEST-DAG: call <3 x half> @lgc.input.import.generic__v3f16{{.*}} ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/ExtExplicitVertexParam_TestInterpFunc_lit.frag b/llpc/test/shaderdb/extensions/ExtExplicitVertexParam_TestInterpFunc_lit.frag index 658f123d0b..891ee11c98 100644 --- a/llpc/test/shaderdb/extensions/ExtExplicitVertexParam_TestInterpFunc_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtExplicitVertexParam_TestInterpFunc_lit.frag @@ -21,8 +21,8 @@ void main() ; SHADERTEST: call {{.*}} <2 x float> @InterpolateAtVertexAMD.v2f32.p64.i32 ; SHADERTEST: call {{.*}} <2 x i32> @InterpolateAtVertexAMD.v2i32.p64.i32 ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call <2 x float> (...) @lgc.input.import.interpolated.v2f32{{.*}} -; SHADERTEST: call <2 x i32> (...) @lgc.input.import.interpolated.v2i32{{.*}} +; SHADERTEST: call <2 x float> (...) @lgc.input.import.interpolated__v2f32{{.*}} +; SHADERTEST: call <2 x i32> (...) @lgc.input.import.interpolated__v2i32{{.*}} ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestInterpFuncs_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestInterpFuncs_lit.frag index 99121c1f7e..0faaeccd45 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestInterpFuncs_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestInterpFuncs_lit.frag @@ -20,7 +20,7 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results ; SHADERTEST: %{{[A-Za-z0-9]*}} = call <2 x float> @lgc.input.import.builtin.InterpPerspCentroid.v2f32.i32(i32 {{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x half> (...) @lgc.input.import.interpolated.v4f16(i1 false, i32 0, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x half> (...) @lgc.input.import.interpolated__v4f16(i1 false, i32 0, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) ; SHADERTEST: = call <2 x float> @lgc.input.import.builtin.SamplePosOffset.v2f32.i32.i32( ; SHADERTEST: = call <3 x float> @lgc.input.import.builtin.InterpPullMode ; SHADERTEST-COUNT-12: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtOffset_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtOffset_lit.frag index 415d389b43..4c6dede97e 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtOffset_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtOffset_lit.frag @@ -27,7 +27,7 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results ; SHADERTEST: = call <3 x float> @lgc.input.import.builtin.InterpPullMode ; SHADERTEST-COUNT-12: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 -; SHADERTEST: = call float (...) @lgc.input.import.interpolated.f32( +; SHADERTEST: = call float (...) @lgc.input.import.interpolated__f32( ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p1(float %{{.*}}, i32 immarg 0, i32 immarg 0, i32 %{{.*}}) ; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p2(float %{{.*}}, float %{{.*}}, i32 immarg 0, i32 immarg 0, i32 %{{.*}}) diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtSample_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtSample_lit.frag index e1ce46503a..cf4d9dc27a 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtSample_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtSample_lit.frag @@ -28,8 +28,8 @@ void main() ; SHADERTEST-DAG: = call <2 x float> @lgc.input.import.builtin.SamplePosOffset.v2f32.i32.i32( ; SHADERTEST-DAG: = call <3 x float> @lgc.input.import.builtin.InterpPullMode.v3f32.i32( ; SHADERTEST-COUNT-12: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 -; SHADERTEST-DAG: = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 0, i32 0, i32 0, i32 poison, i32 0, <2 x float> -; SHADERTEST-DAG: = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 1, i32 0, i32 0, i32 poison, i32 1, i32 poison +; SHADERTEST-DAG: = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 0, i32 0, i32 0, i32 poison, i32 0, <2 x float> +; SHADERTEST-DAG: = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 1, i32 0, i32 0, i32 poison, i32 1, i32 poison ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p1(float %{{.*}}, i32 immarg 0, i32 immarg 0, i32 %{{.*}}) ; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p2(float %{{.*}}, float %{{.*}}, i32 immarg 0, i32 immarg 0, i32 %{{.*}}) diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArray.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArray.frag index 28dea225b3..37a74c8f82 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArray.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArray.frag @@ -19,10 +19,10 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results ; SHADERTEST: = call <3 x float> @lgc.input.import.builtin.InterpPullMode ; SHADERTEST-COUNT-12: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 -; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 17, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 18, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 19, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 20, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 17, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 18, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 19, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 20, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArrayInStruct.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArrayInStruct.frag index 612361a1b0..833bf94d38 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArrayInStruct.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArrayInStruct.frag @@ -30,11 +30,11 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results ; SHADERTEST: = call <3 x float> @lgc.input.import.builtin.InterpPullMode ; SHADERTEST-COUNT-12: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 -; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 3, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 4, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 5, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 6, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 7, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 3, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 4, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 5, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 6, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 7, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DStructArray.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DStructArray.frag index 3182e71734..8232f9884e 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DStructArray.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DStructArray.frag @@ -30,9 +30,9 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results ; SHADERTEST: = call <3 x float> @lgc.input.import.builtin.InterpPullMode ; SHADERTEST-COUNT-12: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 -; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 3, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 7, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 11, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 3, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 7, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST-DAG: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 11, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStruct.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStruct.frag index 7eda965c35..dd893dd2b6 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStruct.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStruct.frag @@ -32,12 +32,12 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results ; SHADERTEST: = call <3 x float> @lgc.input.import.builtin.InterpPullMode ; SHADERTEST-COUNT-12: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 2, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 3, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 4, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 5, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 6, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 7, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 2, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 3, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 4, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 5, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 6, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 7, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStructInArray.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStructInArray.frag index e6942b927e..269ec31503 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStructInArray.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStructInArray.frag @@ -31,18 +31,18 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results ; SHADERTEST: = call <3 x float> @lgc.input.import.builtin.InterpPullMode ; SHADERTEST-COUNT-12: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 12, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 13, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 14, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 15, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 16, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 17, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 21, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 22, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 23, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 24, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 25, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 26, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 12, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 13, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 14, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 15, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 16, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 17, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 21, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 22, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 23, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 24, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 25, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 26, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DStructArray.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DStructArray.frag index 0babae22c7..c66f4ac4c6 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DStructArray.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DStructArray.frag @@ -29,12 +29,12 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results ; SHADERTEST: = call <3 x float> @lgc.input.import.builtin.InterpPullMode ; SHADERTEST-COUNT-12: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 2, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 5, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 8, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 11, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 14, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 17, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 2, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 5, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 8, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 11, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 14, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 17, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx3DArray.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx3DArray.frag index a00e063d34..8638a7e063 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx3DArray.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx3DArray.frag @@ -20,18 +20,18 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results ; SHADERTEST: = call <3 x float> @lgc.input.import.builtin.InterpPullMode ; SHADERTEST-COUNT-12: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 3, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 4, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 5, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 6, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 7, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 8, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 9, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 10, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 11, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 12, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 13, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 14, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 3, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 4, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 5, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 6, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 7, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 8, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 9, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 10, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 11, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 12, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 13, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 14, i32 0, i32 0, i32 poison, i32 0, <2 x float> %{{.*}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdxVector.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdxVector.frag index e5ba13d02c..868061bc8c 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdxVector.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdxVector.frag @@ -21,15 +21,15 @@ void main() ; SHADERTEST-DAG: = call <2 x float> @lgc.input.import.builtin.SamplePosOffset.v2f32.i32.i32( ; SHADERTEST-DAG: = call <3 x float> @lgc.input.import.builtin.InterpPullMode.v3f32.i32( ; SHADERTEST-COUNT-12: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 -; SHADERTEST-DAG: = call <2 x float> (...) @lgc.input.import.interpolated.v2f32(i1 false, i32 0, i32 0, i32 0, i32 poison, i32 0, +; SHADERTEST-DAG: = call <2 x float> (...) @lgc.input.import.interpolated__v2f32(i1 false, i32 0, i32 0, i32 0, i32 poison, i32 0, ; SHADERTEST-DAG: = call <2 x float> @lgc.input.import.builtin.SamplePosOffset.v2f32.i32.i32( ; SHADERTEST-DAG: = call <3 x float> @lgc.input.import.builtin.InterpPullMode.v3f32.i32( ; SHADERTEST-COUNT-12: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 -; SHADERTEST-DAG: = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 1, i32 0, i32 0, i32 poison, i32 0, <2 x float> +; SHADERTEST-DAG: = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 1, i32 0, i32 0, i32 poison, i32 0, <2 x float> ; SHADERTEST-DAG: = call <2 x float> @lgc.input.import.builtin.SamplePosOffset.v2f32.i32.i32( ; SHADERTEST-DAG: = call <3 x float> @lgc.input.import.builtin.InterpPullMode.v3f32.i32( ; SHADERTEST-COUNT-12: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 -; SHADERTEST-DAG: = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 2, i32 0, i32 0, i32 poison, i32 0, <2 x float> +; SHADERTEST-DAG: = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 2, i32 0, i32 0, i32 poison, i32 0, <2 x float> ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineTaskMesh_LdsVariables.pipe b/llpc/test/shaderdb/general/PipelineTaskMesh_LdsVariables.pipe new file mode 100644 index 0000000000..239df98502 --- /dev/null +++ b/llpc/test/shaderdb/general/PipelineTaskMesh_LdsVariables.pipe @@ -0,0 +1,133 @@ +; RUN: amdllpc -v -gfxip 11.0 -o /dev/null %s | FileCheck -check-prefix=SHADERTEST %s +; SHADERTEST: AMDLLPC SUCCESS + +; Test that we are able to build a pipeline in which task and mesh shaders have LDS variables of the same name + +[Version] +version = 70 + +[TaskGlsl] +#version 460 +#extension GL_EXT_mesh_shader : require + +layout(local_size_x = 32) in; + +taskPayloadSharedEXT struct { + vec4 offset; +} payload; + +shared vec4 lds[32]; + +void main() +{ + const uint thread_id = gl_LocalInvocationID.x; + + lds[thread_id] = vec4(thread_id, 0, 0, 0); + + barrier(); + + payload.offset = vec4(4, 0, 0, 0); + + EmitMeshTasksEXT(int(lds[15]), 1, 1); +} + +[TaskInfo] +entryPoint = main + +[MeshGlsl] +#version 460 +#extension GL_EXT_mesh_shader : require + +layout(local_size_x = 32) in; +layout(triangles, max_vertices = 32, max_primitives = 32) out; + +taskPayloadSharedEXT struct { + vec4 offset; +} payload; + +shared vec4 lds[32]; + +void main() +{ + const uint thread_id = gl_LocalInvocationID.x; + + lds[thread_id ^ 1] = vec4(thread_id, 0, 0, 0) + payload.offset; + + barrier(); + + SetMeshOutputsEXT(32, 32); + + gl_MeshVerticesEXT[thread_id].gl_Position = lds[thread_id]; + gl_PrimitiveTriangleIndicesEXT[thread_id] = uvec3(thread_id % 32, (thread_id + 1) % 32, (thread_id + 2) % 32); +} + +[MeshInfo] +entryPoint = main + +[FsGlsl] +#version 460 + +layout(location = 0) out vec4 frag_color; + +void main() { + frag_color = vec4(0.0, 1.0, 0.0, 1.0); +} + +[FsInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 2 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 0 +userDataNode[1].visibility = 97 +userDataNode[1].type = PushConst +userDataNode[1].offsetInDwords = 1 +userDataNode[1].sizeInDwords = 16 +userDataNode[1].set = 0xFFFFFFFF +userDataNode[1].binding = 0 +userDataNode[1].strideInDwords = 0 +userDataNode[2].visibility = 32 +userDataNode[2].type = DescriptorTableVaPtr +userDataNode[2].offsetInDwords = 17 +userDataNode[2].sizeInDwords = 1 +userDataNode[2].next[0].type = DescriptorBuffer +userDataNode[2].next[0].offsetInDwords = 0 +userDataNode[2].next[0].sizeInDwords = 4 +userDataNode[2].next[0].set = 0x00000000 +userDataNode[2].next[0].binding = 1 +userDataNode[2].next[0].strideInDwords = 0 +userDataNode[2].next[1].type = DescriptorBuffer +userDataNode[2].next[1].offsetInDwords = 4 +userDataNode[2].next[1].sizeInDwords = 4 +userDataNode[2].next[1].set = 0x00000000 +userDataNode[2].next[1].binding = 2 +userDataNode[2].next[1].strideInDwords = 0 +userDataNode[2].next[2].type = DescriptorBuffer +userDataNode[2].next[2].offsetInDwords = 8 +userDataNode[2].next[2].sizeInDwords = 4 +userDataNode[2].next[2].set = 0x00000000 +userDataNode[2].next[2].binding = 3 +userDataNode[2].next[2].strideInDwords = 0 +userDataNode[2].next[3].type = DescriptorBuffer +userDataNode[2].next[3].offsetInDwords = 12 +userDataNode[2].next[3].sizeInDwords = 4 +userDataNode[2].next[3].set = 0x00000000 +userDataNode[2].next[3].binding = 4 +userDataNode[2].next[3].strideInDwords = 0 + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST +provokingVertexMode = VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT +numSamples = 1 +colorBuffer[0].format = VK_FORMAT_B8G8R8A8_UNORM +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 1 +nggState.enableNgg = 1 +nggState.subgroupSizing = Auto +nggState.primsPerSubgroup = 256 +nggState.vertsPerSubgroup = 256 +options.scalarBlockLayout = 1 diff --git a/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe b/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe index ecf1098cf3..5373eb6a71 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe @@ -140,9 +140,9 @@ attribute[1].offset = 16 ; SHADERTEST-SAME: (i32 inreg noundef [[GLOBALTABLE:%.*]], i32 inreg noundef [[SAMPLEINFO:%.*]], i32 inreg noundef [[PRIMMASK:%.*]], <2 x float> noundef [[PERSPINTERPSAMPLE:%.*]], <2 x float> noundef [[PERSPINTERPCENTER:%.*]], <2 x float> noundef [[PERSPINTERPCENTROID:%.*]], <3 x float> noundef [[PERSPINTERPPULLMODE:%.*]], <2 x float> noundef [[LINEARINTERPSAMPLE:%.*]], <2 x float> noundef [[LINEARINTERPCENTER:%.*]], <2 x float> noundef [[LINEARINTERPCENTROID:%.*]], float noundef [[LINESTIPPLE:%.*]], float noundef [[FRAGCOORDX:%.*]], float noundef [[FRAGCOORDY:%.*]], float noundef [[FRAGCOORDZ:%.*]], float noundef [[FRAGCOORDW:%.*]], i32 noundef [[FRONTFACING:%.*]], i32 noundef [[ANCILLARY:%.*]], i32 noundef [[SAMPLECOVERAGE:%.*]], i32 noundef [[FIXEDXY:%.*]]) #[[ATTR1:[0-9]+]] !lgc.shaderstage !14 { ; SHADERTEST-NEXT: .entry: ; SHADERTEST-NEXT: [[INTERPPERSPSAMPLE:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspSample.v2f32.i32(i32 268435456) #[[ATTR3:[0-9]+]] -; SHADERTEST-NEXT: [[TMP0:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 1, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPSAMPLE]]) +; SHADERTEST-NEXT: [[TMP0:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 1, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPSAMPLE]]) ; SHADERTEST-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 -; SHADERTEST-NEXT: [[TMP2:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 1, i32 0, i32 1, i32 poison, i32 0, <2 x float> [[INTERPPERSPSAMPLE]]) +; SHADERTEST-NEXT: [[TMP2:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 1, i32 0, i32 1, i32 poison, i32 0, <2 x float> [[INTERPPERSPSAMPLE]]) ; SHADERTEST-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP1]], float [[TMP2]], i64 1 ; SHADERTEST-NEXT: [[SAMPLEPOSITION:%.*]] = call <2 x float> @lgc.input.import.builtin.SamplePosition.v2f32.i32(i32 19) #[[ATTR3]] ; SHADERTEST-NEXT: [[TMP4:%.*]] = fadd reassoc nnan nsz arcp contract afn <2 x float> [[SAMPLEPOSITION]], @@ -217,9 +217,9 @@ attribute[1].offset = 16 ; SHADERTEST-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP67]], i64 0 ; SHADERTEST-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer ; SHADERTEST-NEXT: [[TMP68:%.*]] = fmul <2 x float> [[TMP65]], [[DOTSPLAT4]] -; SHADERTEST-NEXT: [[TMP69:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 0, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[TMP68]]) +; SHADERTEST-NEXT: [[TMP69:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 0, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[TMP68]]) ; SHADERTEST-NEXT: [[TMP70:%.*]] = insertelement <2 x float> poison, float [[TMP69]], i64 0 -; SHADERTEST-NEXT: [[TMP71:%.*]] = call float (...) @lgc.input.import.interpolated.f32(i1 false, i32 0, i32 0, i32 1, i32 poison, i32 0, <2 x float> [[TMP68]]) +; SHADERTEST-NEXT: [[TMP71:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 0, i32 0, i32 1, i32 poison, i32 0, <2 x float> [[TMP68]]) ; SHADERTEST-NEXT: [[TMP72:%.*]] = insertelement <2 x float> [[TMP70]], float [[TMP71]], i64 1 ; SHADERTEST-NEXT: [[TMP73:%.*]] = fsub reassoc nnan nsz arcp contract afn <2 x float> [[TMP72]], [[TMP3]] ; SHADERTEST-NEXT: [[TMP74:%.*]] = call reassoc nnan nsz arcp contract afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP73]]) diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestIgnoreDynamicDualSourceBlendEnable.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestIgnoreDynamicDualSourceBlendEnable.pipe new file mode 100644 index 0000000000..adfd99480d --- /dev/null +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestIgnoreDynamicDualSourceBlendEnable.pipe @@ -0,0 +1,176 @@ + +; BEGIN_SHADERTEST +; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s +; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results +; SHADERTEST: call void @llvm.amdgcn.exp.f32(i32 immarg 0 +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.f32(i32 immarg 1 +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.f32(i32 immarg 2 +; END_SHADERTEST + +[VsGlsl] +#version 450 + +layout(location = 0) out vec2 uv; + +void main() +{ + switch (gl_VertexIndex) + { + case 0: + { + gl_Position = vec4(-1.0, 1.0, 0.0, 1.0); + uv = vec2(0.0, 1.0); + break; + } + case 1: + { + gl_Position = vec4(1.0, 1.0, 0.0, 1.0); + uv = vec2(1.0); + break; + } + case 2: + { + gl_Position = vec4(-1.0, -1.0, 0.0, 1.0); + uv = vec2(0.0); + break; + } + case 3: + { + gl_Position = vec4(1.0, -1.0, 0.0, 1.0); + uv = vec2(1.0, 0.0); + break; + } + } +} + +[VsInfo] +entryPoint = main +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 0 + +trapPresent = 0 +debugMode = 0 +enablePerformanceData = 0 +vgprLimit = 0 +sgprLimit = 0 +maxThreadGroupsPerComputeUnit = 0 + +[FsGlsl] +#version 450 + +layout(location = 0) in vec2 uv; +layout(location = 0) out vec4 color0; +layout(location = 1) out vec4 color1; +layout(location = 2) out vec4 color2; + +float repeat(float f, float count) +{ + return mod(count * f, 1.0); +} + +float quantize(float f, float steps) +{ + return trunc(steps * f) / (steps - 1.0); +} + +void main() +{ + float param = uv.x; + float param_1 = 3.0; + float param_2 = uv.y; + float param_3 = 3.0; + vec2 tile_uv = vec2(repeat(param, param_1), repeat(param_2, param_3)); + float s = uv.x; + float param_4 = uv.y; + float param_5 = 3.0; + float c = quantize(param_4, param_5); + float param_6 = tile_uv.x; + float param_7 = 3.0; + float a = quantize(param_6, param_7); + if (s < 0.3333333432674407958984375) + { + color0 = vec4(c, 0.0, 0.0, a); + color2 = vec4(c, 0.0, 0.0, a); + } + else + { + if (s < 0.666666686534881591796875) + { + color0 = vec4(0.0, c, 0.0, a); + color2 = vec4(0.0, c, 0.0, a); + } + else + { + color0 = vec4(0.0, 0.0, c, a); + color2 = vec4(0.0, 0.0, c, a); + } + } + s = tile_uv.y; + float param_8 = tile_uv.x; + float param_9 = 3.0; + float param_10 = repeat(param_8, param_9); + float param_11 = 3.0; + c = quantize(param_10, param_11); + float param_12 = tile_uv.y; + float param_13 = 3.0; + float param_14 = repeat(param_12, param_13); + float param_15 = 3.0; + a = quantize(param_14, param_15); + color1 = vec4(0.0); + if (s < 0.3333333432674407958984375) + { + color1 += vec4(c, 0.0, 0.0, a); + } + else + { + if (s < 0.666666686534881591796875) + { + color1 += vec4(0.0, c, 0.0, a); + } + else + { + color1 += vec4(0.0, 0.0, c, a); + } + } + if (gl_SampleID == 0) + { + color0 = vec4(0.5); + color1 = vec4(0.5); + color2 = vec4(0.5); + } +} + +[FsInfo] +entryPoint = main +trapPresent = 0 +debugMode = 0 +enablePerformanceData = 0 +vgprLimit = 0 +sgprLimit = 0 +maxThreadGroupsPerComputeUnit = 0 + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP +patchControlPoints = 0 +deviceIndex = 0 +disableVertexReuse = 0 +switchWinding = 0 +enableMultiView = 0 +depthClipEnable = 1 +rasterizerDiscardEnable = 0 +perSampleShading = 1 +numSamples = 8 +samplePatternIdx = 48 +usrClipPlaneMask = 0 +includeDisassembly = 0 +alphaToCoverageEnable = 0 +dualSourceBlendEnable = 0 +dualSourceBlendDynamic = 1 +colorBuffer[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 1 +colorBuffer[0].blendSrcAlphaToColor = 1 +colorBuffer[1].format = VK_FORMAT_R32G32B32A32_SFLOAT +colorBuffer[2].format = VK_FORMAT_R32G32B32A32_SFLOAT diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestIndirectResourceLayout.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestIndirectResourceLayout.pipe index ef2290dc69..71dc89747d 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestIndirectResourceLayout.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestIndirectResourceLayout.pipe @@ -8,7 +8,7 @@ ; SHADERTEST: call void (...) @lgc.create.write.generic.output(<4 x float> [[Value]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results -; SHADERTEST: [[DescLo:%[0-9]*]] = call i32 @lgc.load.user.data.i32(i32 4) +; SHADERTEST: [[DescLo:%[0-9]*]] = call i32 @lgc.load.user.data__i32(i32 4) ; SHADERTEST: [[DescVec:%[0-9]*]] = insertelement <2 x i32> %{{[^,]*}}, i32 [[DescLo]], i64 0 ; SHADERTEST: [[Desc64:%[0-9]*]] = bitcast <2 x i32> [[DescVec]] to i64 ; SHADERTEST: [[Desc:%[0-9]*]] = inttoptr i64 [[Desc64]] to ptr addrspace(4) diff --git a/llpc/test/shaderdb/general/TestComponentIndexing.tese b/llpc/test/shaderdb/general/TestComponentIndexing.tese new file mode 100644 index 0000000000..fc899bcf73 --- /dev/null +++ b/llpc/test/shaderdb/general/TestComponentIndexing.tese @@ -0,0 +1,84 @@ +// BEGIN_SHADERTEST +// This test is to verify input mapping of TES when component indexing is encountered. In such +// case, we are supposed to reserve all components of locations corresponding to a TES input +// in the location info mapping table. We also have to take component offset, specified by +// 'component' qualifier, into consideration and reserve enough components for such indexing. +// RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s + +// SHADERTEST-LABEL: {{^// LLPC}} location input/output mapping results (TES) +// layout(location = 0, component = 1) in vec3 i0[] +// SHADERTEST: (TES) Input: [location, component] = [0, 0] => Mapped = [0, 0] +// SHADERTEST: (TES) Input: [location, component] = [0, 1] => Mapped = [0, 1] +// SHADERTEST: (TES) Input: [location, component] = [0, 2] => Mapped = [0, 2] +// SHADERTEST: (TES) Input: [location, component] = [0, 3] => Mapped = [0, 3] +// layout(location = 1, component = 1) in vec2 i1[] +// SHADERTEST: (TES) Input: [location, component] = [1, 0] => Mapped = [1, 0] +// SHADERTEST: (TES) Input: [location, component] = [1, 1] => Mapped = [1, 1] +// SHADERTEST: (TES) Input: [location, component] = [1, 2] => Mapped = [1, 2] +// layout(location = 2) in dvec4 i2[] +// SHADERTEST: (TES) Input: [location, component] = [2, 0] => Mapped = [2, 0] +// SHADERTEST: (TES) Input: [location, component] = [2, 1] => Mapped = [2, 1] +// SHADERTEST: (TES) Input: [location, component] = [2, 2] => Mapped = [2, 2] +// SHADERTEST: (TES) Input: [location, component] = [2, 3] => Mapped = [2, 3] +// SHADERTEST: (TES) Input: [location, component] = [3, 0] => Mapped = [3, 0] +// SHADERTEST: (TES) Input: [location, component] = [3, 1] => Mapped = [3, 1] +// SHADERTEST: (TES) Input: [location, component] = [3, 2] => Mapped = [3, 2] +// SHADERTEST: (TES) Input: [location, component] = [3, 3] => Mapped = [3, 3] +// layout(location = 4) in dvec3 i3[] +// SHADERTEST: (TES) Input: [location, component] = [4, 0] => Mapped = [4, 0] +// SHADERTEST: (TES) Input: [location, component] = [4, 1] => Mapped = [4, 1] +// SHADERTEST: (TES) Input: [location, component] = [4, 2] => Mapped = [4, 2] +// SHADERTEST: (TES) Input: [location, component] = [4, 3] => Mapped = [4, 3] +// SHADERTEST: (TES) Input: [location, component] = [5, 0] => Mapped = [5, 0] +// SHADERTEST: (TES) Input: [location, component] = [5, 1] => Mapped = [5, 1] + +// layout(location = 0) out vec3 o0 +// SHADERTEST: (TES) Output: [location, component] = [0, 0] => Mapped = [0, 0] +// layout(location = 1) out vec2 o1 +// SHADERTEST: (TES) Output: [location, component] = [1, 0] => Mapped = [1, 0] +// layout(location = 2) out dvec4 o2 +// SHADERTEST: (TES) Output: [location, component] = [2, 0] => Mapped = [2, 0] +// SHADERTEST: (TES) Output: [location, component] = [3, 0] => Mapped = [3, 0] +// layout(location = 4) out dvec3 o3 +// SHADERTEST: (TES) Output: [location, component] = [4, 0] => Mapped = [4, 0] +// SHADERTEST: (TES) Output: [location, component] = [5, 0] => Mapped = [5, 0] + +// SHADERTEST: AMDLLPC SUCCESS +// END_SHADERTEST + +#version 450 core + +layout(triangles) in; + +layout(location = 0, component = 1) in vec3 i0[]; +layout(location = 1, component = 1) in vec2 i1[]; +layout(location = 2) in dvec4 i2[]; +layout(location = 4) in dvec3 i3[]; + +layout(location = 0) out vec3 o0; +layout(location = 1) out vec2 o1; +layout(location = 2) out dvec4 o2; +layout(location = 4) out dvec3 o3; + +layout(binding = 0) uniform Uniform { + uint index; +}; + +void main (void) { + // Constant component indexing + o0.x = i0[0][0]; + o0.y = i0[1][1]; + o0.z = i0[2][2]; + + // Dynamic component indexing + o1[index] = i1[index][index]; + + // 64-bit constant component indexing + o2[0] = i2[0][3]; + o2[1] = i2[1][2]; + o2[2] = i2[2][1]; + o2[3] = i2[0][0]; + + // 64-bit dynamic component indexing + o3[index] = i3[index][index]; +} \ No newline at end of file diff --git a/llpc/test/shaderdb/gfx11/AttributePrecedesPos.pipe b/llpc/test/shaderdb/gfx11/AttributePrecedesPos.pipe index 9a2e5ce890..7aa05e7a74 100644 --- a/llpc/test/shaderdb/gfx11/AttributePrecedesPos.pipe +++ b/llpc/test/shaderdb/gfx11/AttributePrecedesPos.pipe @@ -4,8 +4,8 @@ ; SHADERTEST-LABEL: @_amdgpu_gs_main( ; SHADERTEST: call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0, i32 %{{.*}}, i32 3) ; SHADERTEST: fence release -; SHADERTEST: call void @llvm.amdgcn.exp.f32(i32 immarg 12, i32 immarg 15, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, i1 immarg false, i1 immarg false) -; SHADERTEST: call void @llvm.amdgcn.exp.f32(i32 immarg 13, i32 immarg 1, float 1.000000e+00, float poison, float poison, float poison, i1 immarg true, i1 immarg false) +; SHADERTEST: call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, i1 false, i1 false) +; SHADERTEST: call void @llvm.amdgcn.exp.f32(i32 13, i32 1, float 1.000000e+00, float poison, float poison, float poison, i1 true, i1 false) ; SHADERTEST-LABEL: _amdgpu_gs_main: ; SHADERTEST: buffer_store_b128 {{v[[0-9]*:[0-9]*]}}, {{v[0-9]*}}, {{s[[0-9]*:[0-9]*]}}, {{s[0-9]*}} idxen glc slc ; SHADERTEST: s_waitcnt_vscnt null, 0x0 diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsBasic_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsBasic_lit.frag index f5b5584e9c..f9af92c4c7 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsBasic_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsBasic_lit.frag @@ -19,9 +19,9 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-DAG: call <2 x i32> (...) @lgc.input.import.interpolated.v2i32{{.*}} -; SHADERTEST-DAG: call i32 (...) @lgc.input.import.interpolated.{{.*}} -; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated.v4f32{{.*}} +; SHADERTEST-DAG: call <2 x i32> (...) @lgc.input.import.interpolated__v2i32{{.*}} +; SHADERTEST-DAG: call i32 (...) @lgc.input.import.interpolated__{{.*}} +; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated__v4f32{{.*}} ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag index b3dfea6ce9..50e5515a40 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag @@ -15,8 +15,8 @@ void main (void) ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call <2 x float> (...) @lgc.input.import.interpolated.v2f32{{.*}} -; SHADERTEST: call float (...) @lgc.input.import.interpolated.f32{{.*}} +; SHADERTEST: call <2 x float> (...) @lgc.input.import.interpolated__v2f32{{.*}} +; SHADERTEST: call float (...) @lgc.input.import.interpolated__f32{{.*}} ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 1, i32 immarg 0, i32 %PrimMask) ; SHADERTEST: call float @llvm.amdgcn.interp.p2(float %{{[^,]*}}, float %{{[^,]*}}, i32 immarg 1, i32 immarg 0, i32 %PrimMask) diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsDouble_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsDouble_lit.frag index 08b6f3ae2c..914956eddf 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsDouble_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsDouble_lit.frag @@ -24,10 +24,10 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-DAG: call <4 x double> (...) @lgc.input.import.interpolated.v4f64 -; SHADERTEST-DAG: call <4 x double> (...) @lgc.input.import.interpolated.v4f64 -; SHADERTEST-DAG: call <3 x double> (...) @lgc.input.import.interpolated.v3f64 -; SHADERTEST-DAG: call <3 x double> (...) @lgc.input.import.interpolated.v3f64 +; SHADERTEST-DAG: call <4 x double> (...) @lgc.input.import.interpolated__v4f64 +; SHADERTEST-DAG: call <4 x double> (...) @lgc.input.import.interpolated__v4f64 +; SHADERTEST-DAG: call <3 x double> (...) @lgc.input.import.interpolated__v3f64 +; SHADERTEST-DAG: call <3 x double> (...) @lgc.input.import.interpolated__v3f64 ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST-COUNT-28: call float @llvm.amdgcn.interp.mov ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsInBlock_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsInBlock_lit.frag index e2f4c59510..e03faaf867 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsInBlock_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsInBlock_lit.frag @@ -29,12 +29,12 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-DAG: call i32 (...) @lgc.input.import.interpolated.{{.*}} -; SHADERTEST-DAG: call <3 x float> (...) @lgc.input.import.interpolated.v3f32{{.*}} -; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated.v4f32{{.*}} -; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated.v4f32{{.*}} -; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated.v4f32{{.*}} -; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated.v4f32{{.*}} +; SHADERTEST-DAG: call i32 (...) @lgc.input.import.interpolated__{{.*}} +; SHADERTEST-DAG: call <3 x float> (...) @lgc.input.import.interpolated__v3f32{{.*}} +; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated__v4f32{{.*}} +; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated__v4f32{{.*}} +; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated__v4f32{{.*}} +; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated__v4f32{{.*}} ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST-DAG: call float @llvm.amdgcn.interp.mov ; SHADERTEST-DAG: call float @llvm.amdgcn.interp.p1 diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierInBlock_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierInBlock_lit.frag index 5a362288e5..c9fce2119c 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierInBlock_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierInBlock_lit.frag @@ -23,8 +23,8 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST-DAG: call i32 (...) @lgc.input.import.interpolated{{.*}} -; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated.v4f32{{.*}} -; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated.v4f32{{.*}} +; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated__v4f32{{.*}} +; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated__v4f32{{.*}} ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST-DAG: call float @llvm.amdgcn.interp.mov ; SHADERTEST-DAG: call float @llvm.amdgcn.interp.p1 diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierOnStruct_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierOnStruct_lit.frag index 743f1586df..c8e91574e7 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierOnStruct_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierOnStruct_lit.frag @@ -22,8 +22,8 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call <4 x float> (...) @lgc.input.import.interpolated.v4f32{{.*}} -; SHADERTEST: call <4 x float> (...) @lgc.input.import.interpolated.v4f32{{.*}} +; SHADERTEST: call <4 x float> (...) @lgc.input.import.interpolated__v4f32{{.*}} +; SHADERTEST: call <4 x float> (...) @lgc.input.import.interpolated__v4f32{{.*}} ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST-DAG: call float @llvm.amdgcn.interp.p1 ; SHADERTEST-DAG: call float @llvm.amdgcn.interp.p2 diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifier_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifier_lit.frag index 8094475aaa..d5dd5b613e 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifier_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifier_lit.frag @@ -27,7 +27,7 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-COUNT-7: call float (...) @lgc.input.import.interpolated.f32{{.*}} +; SHADERTEST-COUNT-7: call float (...) @lgc.input.import.interpolated__f32{{.*}} ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call float @llvm.amdgcn.interp.p1 ; SHADERTEST: call float @llvm.amdgcn.interp.p2 diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsMatrixArray_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsMatrixArray_lit.frag index 41a1f4e7fb..1be22c2a5b 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsMatrixArray_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsMatrixArray_lit.frag @@ -18,7 +18,7 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-COUNT-8: call <4 x float> (...) @lgc.input.import.interpolated.v4f32{{.*}} +; SHADERTEST-COUNT-8: call <4 x float> (...) @lgc.input.import.interpolated__v4f32{{.*}} ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST-COUNT-32: call float @llvm.amdgcn.interp.mov ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsMatrix_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsMatrix_lit.frag index ec1725dfc5..fa4fd55558 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsMatrix_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsMatrix_lit.frag @@ -18,7 +18,7 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-COUNT-4: call <4 x float> (...) @lgc.input.import.interpolated.v4f32{{.*}} +; SHADERTEST-COUNT-4: call <4 x float> (...) @lgc.input.import.interpolated__v4f32{{.*}} ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST-COUNT-16: call float @llvm.amdgcn.interp.mov ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsStruct_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsStruct_lit.frag index dba34885e0..2e984447dd 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsStruct_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsStruct_lit.frag @@ -30,9 +30,9 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST-DAG: call i32 (...) @lgc.input.import.interpolated{{.*}} -; SHADERTEST-DAG: call <3 x float> (...) @lgc.input.import.interpolated.v3f32{{.*}} -; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated.v4f32{{.*}} -; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated.v4f32{{.*}} +; SHADERTEST-DAG: call <3 x float> (...) @lgc.input.import.interpolated__v3f32{{.*}} +; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated__v4f32{{.*}} +; SHADERTEST-DAG: call <4 x float> (...) @lgc.input.import.interpolated__v4f32{{.*}} ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST-COUNT-20: call float @llvm.amdgcn.interp.mov ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsVectorArray_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsVectorArray_lit.frag index 6c674d4636..2c51efffd8 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsVectorArray_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsVectorArray_lit.frag @@ -18,7 +18,7 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-COUNT-2: call <4 x float> (...) @lgc.input.import.interpolated.v4f32{{.*}} +; SHADERTEST-COUNT-2: call <4 x float> (...) @lgc.input.import.interpolated__v4f32{{.*}} ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST-COUNT-8: call float @llvm.amdgcn.interp.mov ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/object/ObjInput_TestGsBasic_lit.geom b/llpc/test/shaderdb/object/ObjInput_TestGsBasic_lit.geom index f881379d62..f202bc005b 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestGsBasic_lit.geom +++ b/llpc/test/shaderdb/object/ObjInput_TestGsBasic_lit.geom @@ -24,8 +24,8 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-COUNT-3: call float @lgc.input.import.generic.f32{{.*}} -; SHADERTEST-COUNT-3: call <4 x double> @lgc.input.import.generic.v4f64{{.*}} +; SHADERTEST-COUNT-3: call float @lgc.input.import.generic__f32{{.*}} +; SHADERTEST-COUNT-3: call <4 x double> @lgc.input.import.generic__v4f64{{.*}} ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/object/ObjInput_TestGsCompSpecifier_lit.geom b/llpc/test/shaderdb/object/ObjInput_TestGsCompSpecifier_lit.geom index 31f5ccf2ef..84097bdf3a 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestGsCompSpecifier_lit.geom +++ b/llpc/test/shaderdb/object/ObjInput_TestGsCompSpecifier_lit.geom @@ -22,12 +22,12 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call float @lgc.input.import.generic.f32(i1 false, i32 0, i32 0, i32 0, i32 0) -; SHADERTEST: call float @lgc.input.import.generic.f32(i1 false, i32 0, i32 0, i32 0, i32 1) -; SHADERTEST: call float @lgc.input.import.generic.f32(i1 false, i32 0, i32 0, i32 0, i32 2) -; SHADERTEST: call <3 x float> @lgc.input.import.generic.v3f32(i1 false, i32 0, i32 0, i32 1, i32 0) -; SHADERTEST: call <3 x float> @lgc.input.import.generic.v3f32(i1 false, i32 0, i32 0, i32 1, i32 1) -; SHADERTEST: call <3 x float> @lgc.input.import.generic.v3f32(i1 false, i32 0, i32 0, i32 1, i32 2) +; SHADERTEST: call float @lgc.input.import.generic__f32(i1 false, i32 0, i32 0, i32 0, i32 0) +; SHADERTEST: call float @lgc.input.import.generic__f32(i1 false, i32 0, i32 0, i32 0, i32 1) +; SHADERTEST: call float @lgc.input.import.generic__f32(i1 false, i32 0, i32 0, i32 0, i32 2) +; SHADERTEST: call <3 x float> @lgc.input.import.generic__v3f32(i1 false, i32 0, i32 0, i32 1, i32 0) +; SHADERTEST: call <3 x float> @lgc.input.import.generic__v3f32(i1 false, i32 0, i32 0, i32 1, i32 1) +; SHADERTEST: call <3 x float> @lgc.input.import.generic__v3f32(i1 false, i32 0, i32 0, i32 1, i32 2) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/object/ObjInput_TestIndexingInterpOfInputArray_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestIndexingInterpOfInputArray_lit.frag index d3366a5bbd..dd62358058 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestIndexingInterpOfInputArray_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestIndexingInterpOfInputArray_lit.frag @@ -57,7 +57,7 @@ void main() ; SHADERTEST: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %{{.*}}, i32 238, i32 15, i32 15, i1 true) ; SHADERTEST: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %{{.*}}, i32 68, i32 15, i32 15, i1 true) ; SHADERTEST: call {{.*}}float @llvm.amdgcn.wqm.f32 -; SHADERTEST: = call <4 x float> (...) @lgc.input.import.interpolated.v4f32(i1 false, i32 4, i32 0, i32 0, i32 poison, i32 0, <2 x float> +; SHADERTEST: = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 4, i32 0, i32 0, i32 poison, i32 0, <2 x float> ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call i32 @llvm.amdgcn.mov.dpp.i32 ; SHADERTEST: call i32 @llvm.amdgcn.mov.dpp.i32 diff --git a/llpc/test/shaderdb/object/ObjInput_TestTcsBasic_lit.tesc b/llpc/test/shaderdb/object/ObjInput_TestTcsBasic_lit.tesc index fe45e60628..a957e6757f 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTcsBasic_lit.tesc +++ b/llpc/test/shaderdb/object/ObjInput_TestTcsBasic_lit.tesc @@ -17,9 +17,9 @@ void main (void) ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call i32 @lgc.input.import.builtin.InvocationId{{.*}} -; SHADERTEST: call float @lgc.input.import.generic.f32{{.*}} +; SHADERTEST: call float @lgc.input.import.generic__f32{{.*}} ; SHADERTEST: call i32 @lgc.input.import.builtin.InvocationId{{.*}} -; SHADERTEST: call double @lgc.input.import.generic.f64{{.*}} +; SHADERTEST: call double @lgc.input.import.generic__f64{{.*}} ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/object/ObjInput_TestTcsCompSpecifier_lit.tesc b/llpc/test/shaderdb/object/ObjInput_TestTcsCompSpecifier_lit.tesc index cca99c825c..bf39f8e45b 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTcsCompSpecifier_lit.tesc +++ b/llpc/test/shaderdb/object/ObjInput_TestTcsCompSpecifier_lit.tesc @@ -15,8 +15,8 @@ void main(void) ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call float @lgc.input.import.generic.f32(i1 false, i32 0, i32 0, i32 2, i32 % -; SHADERTEST: call <2 x float> @lgc.input.import.generic.v2f32(i1 false, i32 0, i32 0, i32 0, i32 % +; SHADERTEST: call float @lgc.input.import.generic__f32(i1 false, i32 0, i32 0, i32 2, i32 % +; SHADERTEST: call <2 x float> @lgc.input.import.generic__v2f32(i1 false, i32 0, i32 0, i32 0, i32 % ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/object/ObjInput_TestTcsLoadEntireInputArray_lit.spvasm b/llpc/test/shaderdb/object/ObjInput_TestTcsLoadEntireInputArray_lit.spvasm index 7342d81ace..2d33266762 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTcsLoadEntireInputArray_lit.spvasm +++ b/llpc/test/shaderdb/object/ObjInput_TestTcsLoadEntireInputArray_lit.spvasm @@ -3,7 +3,7 @@ ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-COUNT-6: call <3 x float> @lgc.input.import.generic.v3f32{{.*}} +; SHADERTEST-COUNT-6: call <3 x float> @lgc.input.import.generic__v3f32{{.*}} ; SHADERTEST: AMDLLPC SUCCESS ; END_SHADERTEST diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesBasic_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesBasic_lit.tese index 2a22e3f26e..b5bd89a05b 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesBasic_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesBasic_lit.tese @@ -14,9 +14,9 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call <4 x float> @lgc.input.import.generic.v4f32{{.*}} +; SHADERTEST: call <4 x float> @lgc.input.import.generic__v4f32{{.*}} ; SHADERTEST: call i32 @lgc.input.import.builtin.PrimitiveId{{.*}} -; SHADERTEST: call double @lgc.input.import.generic.f64{{.*}} +; SHADERTEST: call double @lgc.input.import.generic__f64{{.*}} ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesCompSpecifier_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesCompSpecifier_lit.tese index 68756ea1a7..92c4356219 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesCompSpecifier_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesCompSpecifier_lit.tese @@ -16,8 +16,8 @@ void main(void) ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call <2 x float> @lgc.input.import.generic.v2f32(i1 true, i32 0, i32 0, i32 0, i32 poison) -; SHADERTEST: call float @lgc.input.import.generic.f32(i1 true, i32 0, i32 0, i32 2, i32 poison) +; SHADERTEST: call <2 x float> @lgc.input.import.generic__v2f32(i1 true, i32 0, i32 0, i32 0, i32 poison) +; SHADERTEST: call float @lgc.input.import.generic__f32(i1 true, i32 0, i32 0, i32 2, i32 poison) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesComplexInBlock_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesComplexInBlock_lit.tese index a87e32ee2a..4dce2bd4a8 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesComplexInBlock_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesComplexInBlock_lit.tese @@ -37,9 +37,9 @@ void main(void) ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-DAG: call float @lgc.input.import.generic.f32{{.*}} +; SHADERTEST-DAG: call float @lgc.input.import.generic__f32{{.*}} ; SHADERTEST-DAG: call float @lgc.input.import.generic{{.*}} -; SHADERTEST-DAG: call <4 x float> @lgc.input.import.generic.v4f32{{.*}} +; SHADERTEST-DAG: call <4 x float> @lgc.input.import.generic__v4f32{{.*}} ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesComplexPatchInBlock_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesComplexPatchInBlock_lit.tese index c0fb667ee1..f71f93b716 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesComplexPatchInBlock_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesComplexPatchInBlock_lit.tese @@ -55,9 +55,9 @@ void main(void) ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-COUNT-1: call float @lgc.input.import.generic.f32{{.*}} +; SHADERTEST-COUNT-1: call float @lgc.input.import.generic__f32{{.*}} ; SHADERTEST-COUNT-1: call i32 @lgc.input.import.generic{{.*}} -; SHADERTEST-COUNT-4: call float @lgc.input.import.generic.f32{{.*}} +; SHADERTEST-COUNT-4: call float @lgc.input.import.generic__f32{{.*}} ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsBasic_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestVsBasic_lit.vert index ea07404f83..25972abdd7 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsBasic_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsBasic_lit.vert @@ -17,9 +17,9 @@ void main() ; RUN: amdllpc -auto-layout-desc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-DAG: call <2 x i32> @lgc.input.import.generic.v2i32{{.*}} +; SHADERTEST-DAG: call <2 x i32> @lgc.input.import.generic__v2i32{{.*}} ; SHADERTEST-DAG: call i32 @lgc.input.import.generic{{.*}} -; SHADERTEST-DAG: call <4 x float> @lgc.input.import.generic.v4f32{{.*}} +; SHADERTEST-DAG: call <4 x float> @lgc.input.import.generic__v4f32{{.*}} ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST-COUNT-3: call {{.*}} @llvm.amdgcn.struct.tbuffer.load ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsCompSpecifier_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestVsCompSpecifier_lit.vert index b2ac79141a..835b6b6901 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsCompSpecifier_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsCompSpecifier_lit.vert @@ -15,8 +15,8 @@ void main() ; RUN: amdllpc -auto-layout-desc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call <2 x float> @lgc.input.import.generic.v2f32(i1 false, i32 0, i32 0, i32 1, i32 poison) -; SHADERTEST: call float @lgc.input.import.generic.f32(i1 false, i32 0, i32 0, i32 0, i32 poison) +; SHADERTEST: call <2 x float> @lgc.input.import.generic__v2f32(i1 false, i32 0, i32 0, i32 1, i32 poison) +; SHADERTEST: call float @lgc.input.import.generic__f32(i1 false, i32 0, i32 0, i32 0, i32 poison) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} @llvm.amdgcn.struct.tbuffer.load ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsDouble_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestVsDouble_lit.vert index 1286a9228c..a904d550f0 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsDouble_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsDouble_lit.vert @@ -22,13 +22,13 @@ void main() ; RUN: amdllpc -auto-layout-desc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-DAG: call <4 x double> @lgc.input.import.generic.v4f64{{.*}} -; SHADERTEST-DAG: call <4 x double> @lgc.input.import.generic.v4f64{{.*}} -; SHADERTEST-DAG: call <4 x double> @lgc.input.import.generic.v4f64{{.*}} -; SHADERTEST-DAG: call <4 x double> @lgc.input.import.generic.v4f64{{.*}} -; SHADERTEST-DAG: call <4 x double> @lgc.input.import.generic.v4f64{{.*}} -; SHADERTEST-DAG: call <3 x double> @lgc.input.import.generic.v3f64{{.*}} -; SHADERTEST-DAG: call <3 x double> @lgc.input.import.generic.v3f64{{.*}} +; SHADERTEST-DAG: call <4 x double> @lgc.input.import.generic__v4f64{{.*}} +; SHADERTEST-DAG: call <4 x double> @lgc.input.import.generic__v4f64{{.*}} +; SHADERTEST-DAG: call <4 x double> @lgc.input.import.generic__v4f64{{.*}} +; SHADERTEST-DAG: call <4 x double> @lgc.input.import.generic__v4f64{{.*}} +; SHADERTEST-DAG: call <4 x double> @lgc.input.import.generic__v4f64{{.*}} +; SHADERTEST-DAG: call <3 x double> @lgc.input.import.generic__v3f64{{.*}} +; SHADERTEST-DAG: call <3 x double> @lgc.input.import.generic__v3f64{{.*}} ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST-COUNT-12: call i32 @llvm.amdgcn.struct.tbuffer.load.i32 ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsVectorArray_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestVsVectorArray_lit.vert index 6be814bcc5..f4847a543f 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsVectorArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsVectorArray_lit.vert @@ -16,7 +16,7 @@ void main() ; RUN: amdllpc -auto-layout-desc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST-COUNT-2: call <4 x float> @lgc.input.import.generic.v4f32{{.*}} +; SHADERTEST-COUNT-2: call <4 x float> @lgc.input.import.generic__v4f32{{.*}} ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST-COUNT-4: call i32 @llvm.amdgcn.struct.tbuffer.load.i32 ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTcsBasic_lit.tesc b/llpc/test/shaderdb/object/ObjOutput_TestTcsBasic_lit.tesc index 823e90ef52..97fc92daa1 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTcsBasic_lit.tesc +++ b/llpc/test/shaderdb/object/ObjOutput_TestTcsBasic_lit.tesc @@ -25,7 +25,7 @@ void main (void) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call void @lgc.output.export.generic{{.*}}v4f64 ; SHADERTEST: call void @lgc.output.export.generic{{.*}}f32 -; SHADERTEST: call double @lgc.output.import.generic.f64{{.*}} +; SHADERTEST: call double @lgc.output.import.generic__f64{{.*}} ; SHADERTEST: call void @lgc.output.export.generic{{.*}}f64 ; SHADERTEST: AMDLLPC SUCCESS */ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTcsConstExpr_lit.tesc b/llpc/test/shaderdb/object/ObjOutput_TestTcsConstExpr_lit.tesc index cf1c089737..14aaaf49c7 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTcsConstExpr_lit.tesc +++ b/llpc/test/shaderdb/object/ObjOutput_TestTcsConstExpr_lit.tesc @@ -22,7 +22,7 @@ void main(void) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call void @lgc.output.export.generic{{.*}}f32(i32 0, i32 0, i32 0, i32 -1, float 0x3FE6666660000000) ; SHADERTEST: call void @lgc.output.export.generic{{.*}}f32(i32 0, i32 0, i32 1, i32 -1, float 4.500000e+00) -; SHADERTEST: call float @lgc.output.import.generic.f32{{.*}}(i1 true, i32 0, i32 0, i32 1, i32 poison) +; SHADERTEST: call float @lgc.output.import.generic__f32{{.*}}(i1 true, i32 0, i32 0, i32 1, i32 poison) ; SHADERTEST: call void @lgc.output.export.generic{{.*}}f32(i32 0, i32 0, i32 2, i32 -1, float %{{[0-9]*}}) ; SHADERTEST: AMDLLPC SUCCESS */ diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe index 6ef532df23..733bbcb060 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe @@ -160,7 +160,7 @@ rtState.traceRayWaveDensityThreshold[10] = 1 rtState.traceRayWaveDensityThreshold[11] = 1 rtState.traceRayWaveDensityThreshold[12] = 1 rtState.gpurtFeatureFlags = 0 -rtState.gpurtShaderLibrary = Shader_0xE4BF4BB5EC6FAB41.spv +rtState.gpurtShaderLibrary = Shader_0xAC2A9C902883FD2A.spv rtState.gpurtFuncTable.pFunc[0] = TraceRay2_0 rtState.gpurtFuncTable.pFunc[1] = TraceRayInline2_0 rtState.gpurtFuncTable.pFunc[2] = TraceRayUsingHitToken2_0 @@ -185,4 +185,4 @@ pipelineLibStageMask = 0 ; CHECK-LABEL: ; ModuleID = 'lgcPipeline' ; CHECK-NEXT: source_filename = "main" ; CHECK: define dllexport void @lgc.shader.CS.main() !lgc.shaderstage !{{[0-9]+}} !lgc.rt.shaderstage !{{[0-9]+}} { -; CHECK: call void (...) @lgc.cps.jump.cloned._cs_(i32 %{{[0-9]+}}, i32 -1, {} poison, i32 poison, i32 %{{[0-9]+}}, { <3 x i32>, i32 } %{{[0-9]+}}) +; CHECK: call void (...) @lgc.cps.jump(i32 %{{[0-9]+}}, i32 -1, {} poison, i32 poison, i32 %{{[0-9]+}}, { <3 x i32>, i32 } %{{[0-9]+}}) diff --git a/llpc/test/shaderdb/ray_tracing/Shader_0xAC2A9C902883FD2A.spv b/llpc/test/shaderdb/ray_tracing/Shader_0xAC2A9C902883FD2A.spv new file mode 100644 index 0000000000..2aec42d200 Binary files /dev/null and b/llpc/test/shaderdb/ray_tracing/Shader_0xAC2A9C902883FD2A.spv differ diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultipleConstData.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultipleConstData.pipe index 4bc93f27c3..07e271fdc0 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultipleConstData.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultipleConstData.pipe @@ -1,6 +1,9 @@ // This test case checks that the elf linker places symbols for constant data at the correct offset with the correct size. ; BEGIN_SHADERTEST ; RUN: amdllpc -enable-relocatable-shader-elf -o %t.elf %gfxip %s && llvm-objdump --triple=amdgcn --mcpu=gfx1010 -t %t.elf | FileCheck -check-prefix=SHADERTEST %s +// Due to stopping combining arbitrary offsets in PAL relocs, this test no longer generates the symbols being looked for +// Disabling the test for now, but may be worth re-instating (and updating) if the relocs implementation is updated +; REQUIRES: do-not-run-me ; SHADERTEST-LABEL: SYMBOL TABLE: ; SHADERTEST: 0000000000000020 l O .rodata.cst32 0000000000000020 __unnamed_1.vertex ; SHADERTEST: 0000000000000000 l O .rodata.cst32 0000000000000020 __unnamed_2.vertex diff --git a/llpc/translator/lib/SPIRV/SPIRVInternal.h b/llpc/translator/lib/SPIRV/SPIRVInternal.h index d06c530829..7da794f819 100644 --- a/llpc/translator/lib/SPIRV/SPIRVInternal.h +++ b/llpc/translator/lib/SPIRV/SPIRVInternal.h @@ -402,6 +402,8 @@ union ShaderInOutMetadata { uint64_t IsBlockArray : 1; // Whether we are handling block array uint64_t PerVertexDimension : 1; // Whether this is the per-vertex dimension (outermost) for an array uint64_t PerPrimitive : 1; // Whether this is a per-primitive output (mesh shader) + uint64_t NumComponents : 4; // Number of components for input/output scalars/vectors (NOTE: for 64-bit data + // types, each vector element or scalar is considered to occupy two components) }; uint64_t U64All[2]; }; diff --git a/llpc/translator/lib/SPIRV/SPIRVReader.cpp b/llpc/translator/lib/SPIRV/SPIRVReader.cpp index 409ebb52a0..d8f2e23b00 100644 --- a/llpc/translator/lib/SPIRV/SPIRVReader.cpp +++ b/llpc/translator/lib/SPIRV/SPIRVReader.cpp @@ -45,12 +45,12 @@ #include "SPIRVType.h" #include "SPIRVUtil.h" #include "SPIRVValue.h" -#include "continuations/ContinuationsUtil.h" #include "llpcCompiler.h" #include "llpcContext.h" #include "llpcDialect.h" #include "llpcPipelineContext.h" #include "llpcRayTracingContext.h" +#include "llvmraytracing/ContinuationsUtil.h" #include "lgc/LgcDialect.h" #include "lgc/LgcRtDialect.h" #include "lgc/Pipeline.h" @@ -1263,10 +1263,10 @@ Value *SPIRVToLLVM::transConvertInst(SPIRVValue *bv, Function *f, BasicBlock *bb auto dstType = transType(bc->getType()); CastInst::CastOps co = Instruction::BitCast; - lgc::Builder::CooperativeMatrixElementType srcElemTy = lgc::Builder::CooperativeMatrixElementType::Unknown; - lgc::Builder::CooperativeMatrixElementType dstElemTy = lgc::Builder::CooperativeMatrixElementType::Unknown; - lgc::Builder::CooperativeMatrixLayout srcLayout = lgc::Builder::CooperativeMatrixLayout::InvalidLayout; - lgc::Builder::CooperativeMatrixLayout dstLayout = lgc::Builder::CooperativeMatrixLayout::InvalidLayout; + lgc::CooperativeMatrixElementType srcElemTy = lgc::CooperativeMatrixElementType::Unknown; + lgc::CooperativeMatrixElementType dstElemTy = lgc::CooperativeMatrixElementType::Unknown; + lgc::CooperativeMatrixLayout srcLayout = lgc::CooperativeMatrixLayout::InvalidLayout; + lgc::CooperativeMatrixLayout dstLayout = lgc::CooperativeMatrixLayout::InvalidLayout; if (bv->getType()->isTypeCooperativeMatrixKHR()) { auto srcCompType = static_cast(bc->getOperand(0)->getType()) @@ -2828,8 +2828,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const s case OpTypeSampledImage: return transLoadImage(spvLoad->getSrc()); case OpTypeAccelerationStructureKHR: { - auto pipelineContext = (static_cast(m_context))->getPipelineContext(); - if (pipelineContext->getRayTracingState()->forceInvalidAccelStruct) { + if (getPipelineContext()->getRayTracingState()->forceInvalidAccelStruct) { // Always return invalid AS address (0x0, 0x0) if the option is set. auto loadType = cast_or_null(getPointeeType(spvLoad->getSrc())); assert(loadType && (loadType->getNumElements() == 2)); @@ -3506,8 +3505,8 @@ SmallVector SPIRVToLLVM::transAccessChain(SPIRVValue *const spvValue) { unsigned columns = spvAccessType->getCooperativeMatrixKHRColumns(); spvAccessType = spvAccessType->getCooperativeMatrixKHRComponentType(); basePointeeType = transType(spvAccessType); - lgc::BuilderCommon::CooperativeMatrixElementType elemType = mapToBasicType(spvAccessType); - lgc::BuilderCommon::CooperativeMatrixLayout layout = + lgc::CooperativeMatrixElementType elemType = mapToBasicType(spvAccessType); + lgc::CooperativeMatrixLayout layout = getCooperativeMatrixKHRLayout(static_cast(use), elemType, rows, columns); std::string mangledName(LlpcName::SpirvCooperativeMatrixProxy); @@ -4134,11 +4133,8 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(m_context); - auto *pipelineContext = static_cast(llpcContext->getPipelineContext()); - pipelineContext->setIndirectPipeline(); - } + if (m_execModule != ExecutionModelRayGenerationKHR) + getRaytracingContext()->setIndirectPipeline(); SPIRVInstruction *const spvInst = static_cast(spvValue); std::vector spvOperands = spvInst->getOperands(); @@ -4161,8 +4157,13 @@ Value *SPIRV::SPIRVToLLVM::createTraceRayDialectOp(SPIRVValue *const spvValue) { Type *payloadTy = transType(spvOperands[10]->getType()->getPointerElementType()); auto paq = getPaqFromSize(getBuilder()->getContext(), alignTo(m_m->getDataLayout().getTypeAllocSize(payloadTy), 4)); - return getBuilder()->create(accelStructAsI64, rayFlags, cullMask, sbtOffset, sbtStride, missIndex, + CallInst *call = nullptr; + call = getBuilder()->create(accelStructAsI64, rayFlags, cullMask, sbtOffset, sbtStride, missIndex, rayOrigin, rayTMin, rayDir, rayTMax, payload, paq); + + // Store a poison value as metadata to track payload data type. + ContHelper::setPayloadTypeMetadata(call, payloadTy); + return call; } // ===================================================================================================================== @@ -4170,11 +4171,9 @@ Value *SPIRV::SPIRVToLLVM::createTraceRayDialectOp(SPIRVValue *const spvValue) { // // @param spvValue : A SPIR-V value. template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const spvValue) { - if (m_execModule == ExecutionModelCallableKHR) { - Llpc::Context *llpcContext = static_cast(m_context); - auto *pipelineContext = static_cast(llpcContext->getPipelineContext()); - pipelineContext->setIndirectPipeline(); - } + if (m_execModule == ExecutionModelCallableKHR) + getRaytracingContext()->setIndirectPipeline(); + SPIRVInstruction *const spvInst = static_cast(spvValue); std::vector spvOperands = spvInst->getOperands(); @@ -4185,7 +4184,10 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRV Type *callableDataTy = transType(spvOperands[1]->getType()->getPointerElementType()); unsigned dataByteSize = alignTo(m_m->getDataLayout().getTypeAllocSize(callableDataTy), 4); - return getBuilder()->create(shaderIndex, callableData, dataByteSize); + auto *call = getBuilder()->create(shaderIndex, callableData, dataByteSize); + // Store a poison value as metadata to track callable data type. + ContHelper::setPayloadTypeMetadata(call, callableDataTy); + return call; } // ===================================================================================================================== @@ -4237,12 +4239,20 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SP Value *const hitT = transValue(spvOperands[0], func, block); Value *const hitKind = transValue(spvOperands[1], func, block); - // We don't have attribute for this Op in SPIR-V - // TODO: Pass the pointer of hit attribute global variable here after SpirvLowerRayTracing is moved after - // SpirvLowerGlobal - auto dummyPtr = ConstantPointerNull::get(PointerType::get(getBuilder()->getContext(), SPIRAS_Private)); - - return getBuilder()->create(hitT, hitKind, dummyPtr, 0); + auto *hitAttribute = m_m->getGlobalVariable("HitAttributeKHR0"); + + // Pass the size of the hit attribute specific to this report.hit call. + unsigned hitAttrBytes = 0; + Value *hitAttrPtr = hitAttribute; + if (hitAttribute) + hitAttrBytes = m_m->getDataLayout().getTypeAllocSize(hitAttribute->getValueType()); + else + hitAttrPtr = ConstantPointerNull::get(PointerType::get(getBuilder()->getContext(), SPIRAS_Private)); + unsigned hitAttrDwords = divideCeil(hitAttrBytes, 4); + auto *call = getBuilder()->create(hitT, hitKind, hitAttrPtr, hitAttrBytes); + // Store a poison value as metadata to track hit attribute data type. + ContHelper::setPayloadTypeMetadata(call, ArrayType::get(m_builder->getInt32Ty(), hitAttrDwords)); + return call; } // ===================================================================================================================== @@ -4714,8 +4724,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *cons // @param bb : Which basicblock to generate code Value *SPIRVToLLVM::transDebugPrintf(SPIRVInstruction *bi, const ArrayRef spvValues, Function *func, BasicBlock *bb) { - auto pipelineContext = (static_cast(m_context))->getPipelineContext(); - auto resMapping = pipelineContext->getResourceMapping(); + auto resMapping = getPipelineContext()->getResourceMapping(); unsigned nodeIndex = 0; if (findResourceNode(resMapping->pUserDataNodes, resMapping->userDataNodeCount, Vkgc::InternalDescriptorSetId, Vkgc::PrintfBufferBindingId, &nodeIndex) == nullptr) @@ -4960,8 +4969,8 @@ Value *SPIRVToLLVM::transVariable(SPIRVValue *const spvValue) { BasicBlock *bb = getBuilder()->GetInsertBlock(); assert(bb->isEntryBlock()); getBuilder()->SetInsertPoint(bb, bb->getFirstInsertionPt()); - - Value *const var = getBuilder()->CreateAlloca(varType, nullptr, spvVar->getName()); + auto allocAddr = m_m->getDataLayout().getAllocaAddrSpace(); + Value *const var = getBuilder()->CreateAlloca(varType, allocAddr, nullptr, spvVar->getName()); getBuilder()->restoreIP(insertPoint); @@ -4978,18 +4987,18 @@ Value *SPIRVToLLVM::transVariable(SPIRVValue *const spvValue) { unsigned loc = 0; spvVar->hasDecorate(DecorationLocation, 0, &loc); varName = SPIRVStorageClassNameMap::map(storageClass) + std::to_string(loc); - Llpc::Context *llpcContext = static_cast(m_context); + Llpc::PipelineContext *pipelineContext = getPipelineContext(); if (storageClass == StorageClassRayPayloadKHR || storageClass == StorageClassIncomingRayPayloadKHR) { - llpcContext->getPipelineContext()->collectPayloadSize(varType, m_m->getDataLayout()); + pipelineContext->collectPayloadSize(varType, m_m->getDataLayout()); } if (storageClass == StorageClassCallableDataKHR || storageClass == StorageClassIncomingCallableDataKHR) { - llpcContext->getPipelineContext()->collectCallableDataSize(varType, m_m->getDataLayout()); + pipelineContext->collectCallableDataSize(varType, m_m->getDataLayout()); } if (storageClass == StorageClassHitAttributeKHR) - llpcContext->getPipelineContext()->collectAttributeDataSize(varType, m_m->getDataLayout()); + pipelineContext->collectAttributeDataSize(varType, m_m->getDataLayout()); } GlobalVariable *const globalVar = @@ -5053,8 +5062,8 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVV SPIRVType *elemSpvType = spvOperands[0]->getType()->getCooperativeMatrixKHRComponentType(); unsigned rows = spvOperands[0]->getType()->getCooperativeMatrixKHRRows(); unsigned columns = spvOperands[0]->getType()->getCooperativeMatrixKHRColumns(); - lgc::Builder::CooperativeMatrixElementType elemType = mapToBasicType(elemSpvType); - lgc::Builder::CooperativeMatrixLayout layout = getCooperativeMatrixKHRLayout( + lgc::CooperativeMatrixElementType elemType = mapToBasicType(elemSpvType); + lgc::CooperativeMatrixLayout layout = getCooperativeMatrixKHRLayout( static_cast(spvOperands[0]->getType()->getCooperativeMatrixKHRUse()), elemType, rows, columns); return getBuilder()->CreateCoopMatrixTimesScalar(matrix, scalar, elemType, layout); @@ -5164,57 +5173,55 @@ Value *SPIRVToLLVM::transString(const SPIRVString *spvValue) { // For integer types, arbitrary signedness combinations are supported for the // A/B matrices.C/D matrices are always signed. -lgc::Builder::CooperativeMatrixElementType SPIRVToLLVM::mapToBasicType(Type *const elemType) { - lgc::Builder::CooperativeMatrixElementType basicTy = lgc::Builder::CooperativeMatrixElementType::Unknown; +lgc::CooperativeMatrixElementType SPIRVToLLVM::mapToBasicType(Type *const elemType) { + lgc::CooperativeMatrixElementType basicTy = lgc::CooperativeMatrixElementType::Unknown; if (elemType->isIntegerTy(8)) { - basicTy = lgc::Builder::CooperativeMatrixElementType::Int8; + basicTy = lgc::CooperativeMatrixElementType::Int8; } else if (elemType->isIntegerTy(16)) { - basicTy = lgc::Builder::CooperativeMatrixElementType::Int16; + basicTy = lgc::CooperativeMatrixElementType::Int16; } else if (elemType->isIntegerTy(32)) { - basicTy = lgc::Builder::CooperativeMatrixElementType::Int32; + basicTy = lgc::CooperativeMatrixElementType::Int32; } else if (elemType->isFloatTy()) { - basicTy = lgc::Builder::CooperativeMatrixElementType::Float32; + basicTy = lgc::CooperativeMatrixElementType::Float32; } else if (elemType->isHalfTy()) { - basicTy = lgc::Builder::CooperativeMatrixElementType::Float16; + basicTy = lgc::CooperativeMatrixElementType::Float16; } else { llvm_unreachable("The element type is not supported!"); } return basicTy; } -lgc::Builder::CooperativeMatrixElementType SPIRVToLLVM::mapToBasicType(SPIRVType *const elemType) { - lgc::Builder::CooperativeMatrixElementType basicTy = lgc::Builder::CooperativeMatrixElementType::Unknown; +lgc::CooperativeMatrixElementType SPIRVToLLVM::mapToBasicType(SPIRVType *const elemType) { + lgc::CooperativeMatrixElementType basicTy = lgc::CooperativeMatrixElementType::Unknown; if (elemType->isTypeInt(8)) { - basicTy = lgc::Builder::CooperativeMatrixElementType::Int8; + basicTy = lgc::CooperativeMatrixElementType::Int8; } else if (elemType->isTypeInt(16)) { - basicTy = lgc::Builder::CooperativeMatrixElementType::Int16; + basicTy = lgc::CooperativeMatrixElementType::Int16; } else if (elemType->isTypeInt(32)) { - basicTy = lgc::Builder::CooperativeMatrixElementType::Int32; + basicTy = lgc::CooperativeMatrixElementType::Int32; } else if (elemType->isTypeFloat(32)) { - basicTy = lgc::Builder::CooperativeMatrixElementType::Float32; + basicTy = lgc::CooperativeMatrixElementType::Float32; } else if (elemType->isTypeFloat(16)) { - basicTy = lgc::Builder::CooperativeMatrixElementType::Float16; + basicTy = lgc::CooperativeMatrixElementType::Float16; } else { llvm_unreachable("The element type is not supported!"); } return basicTy; } -lgc::Builder::CooperativeMatrixLayout SPIRVToLLVM::getLayout(lgc::Builder::CooperativeMatrixElementType elemType) { - const Vkgc::GfxIpVersion gfxIp = static_cast(m_context)->getPipelineContext()->getGfxIpVersion(); - if (elemType == lgc::Builder::CooperativeMatrixElementType::Int32 || - elemType == lgc::Builder::CooperativeMatrixElementType::Float32) { +lgc::CooperativeMatrixLayout SPIRVToLLVM::getLayout(lgc::CooperativeMatrixElementType elemType) { + const Vkgc::GfxIpVersion gfxIp = getPipelineContext()->getGfxIpVersion(); + if (elemType == lgc::CooperativeMatrixElementType::Int32 || elemType == lgc::CooperativeMatrixElementType::Float32) { if (gfxIp.major == 11) - return lgc::Builder::CooperativeMatrixLayout::AccumulatorMatrixLayout; - return lgc::Builder::CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout; + return lgc::CooperativeMatrixLayout::AccumulatorMatrixLayout; + return lgc::CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout; } - if (elemType == lgc::Builder::CooperativeMatrixElementType::Int16 || - elemType == lgc::Builder::CooperativeMatrixElementType::Int8 || - elemType == lgc::Builder::CooperativeMatrixElementType::Float16) { - return lgc::Builder::CooperativeMatrixLayout::FactorMatrixLayout; + if (elemType == lgc::CooperativeMatrixElementType::Int16 || elemType == lgc::CooperativeMatrixElementType::Int8 || + elemType == lgc::CooperativeMatrixElementType::Float16) { + return lgc::CooperativeMatrixLayout::FactorMatrixLayout; } llvm_unreachable("The element type is not supported!"); - return lgc::Builder::CooperativeMatrixLayout::InvalidLayout; + return lgc::CooperativeMatrixLayout::InvalidLayout; } // ===================================================================================================================== @@ -5223,27 +5230,26 @@ lgc::Builder::CooperativeMatrixLayout SPIRVToLLVM::getLayout(lgc::Builder::Coope // @param elemType : The type for the CooperativeMatrix element. // @param rows: The size of the row for the CooperativeMatrix. // @param columns: The size of the column for the CooperativeMatrix. -lgc::Builder::CooperativeMatrixLayout SPIRVToLLVM::getCooperativeMatrixKHRLayout( - CooperativeMatrixUse use, lgc::Builder::CooperativeMatrixElementType elemType, unsigned rows, unsigned columns) { - const Vkgc::GfxIpVersion gfxIp = static_cast(m_context)->getPipelineContext()->getGfxIpVersion(); +lgc::CooperativeMatrixLayout SPIRVToLLVM::getCooperativeMatrixKHRLayout(CooperativeMatrixUse use, + lgc::CooperativeMatrixElementType elemType, + unsigned rows, unsigned columns) { + const Vkgc::GfxIpVersion gfxIp = getPipelineContext()->getGfxIpVersion(); if (use == CooperativeMatrixUse::CooperativeMatrixUseMatrixAKHR || use == CooperativeMatrixUse::CooperativeMatrixUseMatrixBKHR) { - return lgc::Builder::CooperativeMatrixLayout::FactorMatrixLayout; + return lgc::CooperativeMatrixLayout::FactorMatrixLayout; } if (use == CooperativeMatrixUse::CooperativeMatrixUseMatrixAccumulatorKHR) { if (gfxIp.major == 11) - return lgc::Builder::CooperativeMatrixLayout::AccumulatorMatrixLayout; - if (elemType == lgc::Builder::CooperativeMatrixElementType::Float32 || - elemType == lgc::Builder::CooperativeMatrixElementType::Int32) - return lgc::Builder::CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout; - if (elemType == lgc::Builder::CooperativeMatrixElementType::Int16 || - elemType == lgc::Builder::CooperativeMatrixElementType::Float16) - return lgc::Builder::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout; + return lgc::CooperativeMatrixLayout::AccumulatorMatrixLayout; + if (elemType == lgc::CooperativeMatrixElementType::Float32 || elemType == lgc::CooperativeMatrixElementType::Int32) + return lgc::CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout; + if (elemType == lgc::CooperativeMatrixElementType::Int16 || elemType == lgc::CooperativeMatrixElementType::Float16) + return lgc::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout; llvm_unreachable("Invalid element type!"); - return lgc::Builder::CooperativeMatrixLayout::InvalidLayout; + return lgc::CooperativeMatrixLayout::InvalidLayout; } llvm_unreachable("The element type is not supported!"); - return lgc::Builder::CooperativeMatrixLayout::InvalidLayout; + return lgc::CooperativeMatrixLayout::InvalidLayout; } // ===================================================================================================================== @@ -5330,15 +5336,23 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode CooperativeMatrixUse use = static_cast(coopMatLoad->getType()->getCooperativeMatrixKHRUse()); unsigned rows = static_cast(coopMatLoad->getType()->getCooperativeMatrixKHRRows()); unsigned columns = static_cast(coopMatLoad->getType()->getCooperativeMatrixKHRColumns()); + lgc::CooperativeMatrixElementType elemType = mapToBasicType(elemSpvType); if (use == CooperativeMatrixUse::CooperativeMatrixUseMatrixAKHR) { // Layout A is the transposition of the layout B, col_major_A = row_majow_B. // FactorMatrixLayout is for B, so it needs inverse the layout when use is A. isColMajor = !isColMajor; } - lgc::Builder::CooperativeMatrixElementType elemType = mapToBasicType(elemSpvType); - lgc::Builder::CooperativeMatrixLayout layout = getCooperativeMatrixKHRLayout(use, elemType, rows, columns); - auto CoopMatLoadInst = - getBuilder()->CreateCooperativeMatrixLoad(pointer, stride, isColMajor, elemType, layout, memoryAccess); + // For OpCooperativeMatrixLoadKHR and OpCooperativeMatrixStoreKHR instructions, the Pointer and Stride operands + // must be aligned to at least the lesser of 16 bytes or the natural alignment of a row or column + // (depending on ColumnMajor) of the matrix (where the natural alignment is the number of columns/rows multiplied + // by the component size). + Type *elementllType = getBuilder()->transCooperativeMatrixElementType(elemType); + unsigned elementSize = static_cast(m_m->getDataLayout().getTypeSizeInBits(elementllType) / 8); + unsigned alignmentInRowCol = (isColMajor ? rows : columns) * elementSize; + unsigned loadAlignment = std::min((unsigned)16, alignmentInRowCol); + lgc::CooperativeMatrixLayout layout = getCooperativeMatrixKHRLayout(use, elemType, rows, columns); + auto CoopMatLoadInst = getBuilder()->CreateCooperativeMatrixLoad(pointer, stride, isColMajor, elemType, layout, + memoryAccess, Align(loadAlignment)); return CoopMatLoadInst; } @@ -5402,7 +5416,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcodegetObject()->getType()->getCooperativeMatrixKHRComponentType()); - lgc::Builder::CooperativeMatrixElementType elemType = mapToBasicType(elemltType); + lgc::CooperativeMatrixElementType elemType = mapToBasicType(elemltType); CooperativeMatrixUse use = static_cast(coopMatStore->getObject()->getType()->getCooperativeMatrixKHRUse()); @@ -5414,7 +5428,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode Value *SPIRVToLLVM::transValueWithOpcodeCreateCooperativeMatrixStore(pointer, matrix, stride, isColMajor, elemType, layout, memoryAccess); + // For OpCooperativeMatrixLoadKHR and OpCooperativeMatrixStoreKHR instructions, the Pointer and Stride operands + // must be aligned to at least the lesser of 16 bytes or the natural alignment of a row or column + // (depending on ColumnMajor) of the matrix (where the natural alignment is the number of columns/rows multiplied + // by the component size). + unsigned elementSize = static_cast(m_m->getDataLayout().getTypeSizeInBits(elemltType) / 8); + unsigned alignmentInRowCol = (isColMajor ? rows : columns) * elementSize; + unsigned storeAlignment = std::min((unsigned)16, alignmentInRowCol); + getBuilder()->CreateCooperativeMatrixStore(pointer, matrix, stride, isColMajor, elemType, layout, memoryAccess, + Align(storeAlignment)); return nullptr; } @@ -5445,8 +5467,8 @@ template <> Value *SPIRVToLLVM::transValueWithOpcodegetType()->getCooperativeMatrixKHRComponentType(); SPIRVType *elemTypeC = spvOperands[2]->getType()->getCooperativeMatrixKHRComponentType(); - lgc::Builder::CooperativeMatrixElementType elemBasicTypeA = mapToBasicType(elemTypeA); - lgc::Builder::CooperativeMatrixElementType elemBasicTypeC = mapToBasicType(elemTypeC); + lgc::CooperativeMatrixElementType elemBasicTypeA = mapToBasicType(elemTypeA); + lgc::CooperativeMatrixElementType elemBasicTypeC = mapToBasicType(elemTypeC); bool isSignedA = static_cast(static_cast(spvInst)->getMatrixASigned()); bool isSignedB = static_cast(static_cast(spvInst)->getMatrixBSigned()); @@ -6133,15 +6155,15 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu CastInst::CastOps co = Instruction::BitCast; if (bv->getType()->isTypeCooperativeMatrixKHR()) { SPIRVType *dstType = bc->getType()->getCooperativeMatrixKHRComponentType(); - lgc::Builder::CooperativeMatrixElementType basicDstElemTy = mapToBasicType(dstType); + lgc::CooperativeMatrixElementType basicDstElemTy = mapToBasicType(dstType); SPIRVType *srcType = bc->getOperand(0)->getType()->getCooperativeMatrixKHRComponentType(); bool isExt = dstType->getBitWidth() > srcType->getBitWidth(); co = isExt ? Instruction::FPExt : Instruction::FPTrunc; - lgc::Builder::CooperativeMatrixElementType basicSrcElemTy = mapToBasicType(srcType); - lgc::Builder::CooperativeMatrixLayout srcLayout = getCooperativeMatrixKHRLayout( + lgc::CooperativeMatrixElementType basicSrcElemTy = mapToBasicType(srcType); + lgc::CooperativeMatrixLayout srcLayout = getCooperativeMatrixKHRLayout( static_cast(bc->getType()->getCooperativeMatrixKHRUse()), basicSrcElemTy, bc->getType()->getCooperativeMatrixKHRRows(), bc->getType()->getCooperativeMatrixKHRColumns()); - lgc::Builder::CooperativeMatrixLayout dstLayout = getCooperativeMatrixKHRLayout( + lgc::CooperativeMatrixLayout dstLayout = getCooperativeMatrixKHRLayout( static_cast(bc->getType()->getCooperativeMatrixKHRUse()), basicDstElemTy, bc->getType()->getCooperativeMatrixKHRRows(), bc->getType()->getCooperativeMatrixKHRColumns()); return mapValue(bv, getBuilder()->CreateCooperativeMatrixConvert(co, val, basicSrcElemTy, basicDstElemTy, @@ -6325,7 +6347,7 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu auto llpcContext = static_cast(m_context); if (llpcContext->getPipelineType() == PipelineType::Graphics) { auto buildInfo = static_cast(llpcContext->getPipelineBuildInfo()); - if (buildInfo->originUpperLeft) + if (buildInfo->getGlState().originUpperLeft) dpdy = getBuilder()->CreateFNeg(dpdy); } @@ -8187,7 +8209,7 @@ bool SPIRVToLLVM::translate(ExecutionModel entryExecModel, const char *entryName shaderMode.useSubgroupSize = m_moduleUsage->useSubgroupSize; // Shader modes contain also data for other modules (subgroup size usage), so query it in the pipeline context. - auto pipelineContext = (static_cast(m_context))->getPipelineContext(); + auto pipelineContext = getPipelineContext(); unsigned subgroupSizeUsage = pipelineContext->getSubgroupSizeUsage(); // NOTE: setCommonShaderMode() supports the graphics and compute stage, does not support raytracing stage shaderMode.useSubgroupSize = @@ -9118,6 +9140,10 @@ Constant *SPIRVToLLVM::buildShaderInOutMetadata(SPIRVType *bt, ShaderInOutDecora inOutMd.IsBuiltIn = false; inOutMd.Value = inOutDec.Value.Loc; inOutMd.Index = inOutDec.Index; + // Record the number of components for vector inputs/outputs + inOutMd.NumComponents = bt->isTypeVector() ? bt->getVectorComponentCount() : 1; + if (bt->isTypeScalar() || bt->isTypeVector()) + inOutMd.NumComponents *= (bt->getBitWidth() == 64 ? 2 : 1); } inOutMd.Component = inOutDec.Component; @@ -10330,15 +10356,23 @@ llvm::Function *SPIRVToLLVM::createLibraryEntryFunc() { return func; } -const Vkgc::PipelineOptions *SPIRVToLLVM::getPipelineOptions() const { +PipelineContext *SPIRVToLLVM::getPipelineContext() const { assert(m_context && "Invalid context!"); - return static_cast(m_context)->getPipelineContext()->getPipelineOptions(); + return static_cast(m_context)->getPipelineContext(); +} + +const Vkgc::PipelineOptions *SPIRVToLLVM::getPipelineOptions() const { + return getPipelineContext()->getPipelineOptions(); +} + +RayTracingContext *SPIRVToLLVM::getRaytracingContext() const { + return static_cast(getPipelineContext()); } bool SPIRVToLLVM::scratchBoundsChecksEnabled() const { assert(m_context && "Invalid context!"); - const Vkgc::GfxIpVersion gfxIp = static_cast(m_context)->getPipelineContext()->getGfxIpVersion(); + const Vkgc::GfxIpVersion gfxIp = getPipelineContext()->getGfxIpVersion(); return gfxIp.major >= 9 || getPipelineOptions()->enableScratchAccessBoundsChecks; } @@ -10576,7 +10610,7 @@ void SPIRVToLLVM::insertScratchBoundsChecks(SPIRVValue *memOp, const ScratchBoun void SPIRVToLLVM::createXfbMetadata(bool hasXfbOuts) { auto llpcContext = static_cast(m_context); auto pipelineBuildInfo = static_cast(llpcContext->getPipelineBuildInfo()); - bool needXfbMetadata = hasXfbOuts && !pipelineBuildInfo->apiXfbOutData.forceDisableStreamOut; + bool needXfbMetadata = hasXfbOuts && !pipelineBuildInfo->getGlState().apiXfbOutData.forceDisableStreamOut; #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 70 needXfbMetadata |= pipelineBuildInfo->apiXfbOutData.forceEnablePrimStats; #endif @@ -10595,8 +10629,8 @@ void SPIRVToLLVM::createXfbMetadata(bool hasXfbOuts) { InvalidValue, 0, // xfbBuffer[1] -> InvalidValue, 0, // xfbBuffer[2] -> InvalidValue, 0}; // xfbBuffer[3] -> - if (hasXfbOuts && !pipelineBuildInfo->apiXfbOutData.forceDisableStreamOut) { - const bool useXfbDecorations = pipelineBuildInfo->apiXfbOutData.numXfbOutInfo == 0; + if (hasXfbOuts && !pipelineBuildInfo->getGlState().apiXfbOutData.forceDisableStreamOut) { + const bool useXfbDecorations = pipelineBuildInfo->getGlState().apiXfbOutData.numXfbOutInfo == 0; if (useXfbDecorations) { for (unsigned i = 0, e = m_bm->getNumVariables(); i != e; ++i) { auto bv = m_bm->getVariable(i); @@ -10692,8 +10726,8 @@ void SPIRVToLLVM::createXfbMetadata(bool hasXfbOuts) { } } } else { - for (unsigned idx = 0; idx < pipelineBuildInfo->apiXfbOutData.numXfbOutInfo; ++idx) { - const auto &xfbInfo = pipelineBuildInfo->apiXfbOutData.pXfbOutInfos[idx]; + for (unsigned idx = 0; idx < pipelineBuildInfo->getGlState().apiXfbOutData.numXfbOutInfo; ++idx) { + const auto &xfbInfo = pipelineBuildInfo->getGlState().apiXfbOutData.pXfbOutInfos[idx]; const unsigned indexOfBuffer = 2 * xfbInfo.xfbBuffer; xfbState[indexOfBuffer] = xfbInfo.streamId; xfbState[indexOfBuffer + 1] = xfbInfo.xfbStride; @@ -10715,63 +10749,63 @@ void SPIRVToLLVM::createXfbMetadata(bool hasXfbOuts) { Value *SPIRVToLLVM::transCooperativeMatrixArithInst(SPIRVValue *spvVal, BasicBlock *bb) { auto oc = spvVal->getOpCode(); Function *func = bb->getParent(); - Builder::CooperativeMatrixArithOp arithOp; + CooperativeMatrixArithOp arithOp; switch (oc) { case OpFNegate: - arithOp = Builder::CooperativeMatrixArithOp::FSub; + arithOp = CooperativeMatrixArithOp::FSub; break; case OpSNegate: - arithOp = Builder::CooperativeMatrixArithOp::ISub; + arithOp = CooperativeMatrixArithOp::ISub; break; case OpFAdd: - arithOp = Builder::CooperativeMatrixArithOp::FAdd; + arithOp = CooperativeMatrixArithOp::FAdd; break; case OpIAdd: - arithOp = Builder::CooperativeMatrixArithOp::IAdd; + arithOp = CooperativeMatrixArithOp::IAdd; break; case OpISub: - arithOp = Builder::CooperativeMatrixArithOp::ISub; + arithOp = CooperativeMatrixArithOp::ISub; break; case OpFSub: - arithOp = Builder::CooperativeMatrixArithOp::FSub; + arithOp = CooperativeMatrixArithOp::FSub; break; case OpIMul: - arithOp = Builder::CooperativeMatrixArithOp::IMul; + arithOp = CooperativeMatrixArithOp::IMul; break; case OpFMul: - arithOp = Builder::CooperativeMatrixArithOp::FMul; + arithOp = CooperativeMatrixArithOp::FMul; break; case OpFDiv: - arithOp = Builder::CooperativeMatrixArithOp::FDiv; + arithOp = CooperativeMatrixArithOp::FDiv; break; case OpSDiv: - arithOp = Builder::CooperativeMatrixArithOp::SDiv; + arithOp = CooperativeMatrixArithOp::SDiv; break; case OpUDiv: - arithOp = Builder::CooperativeMatrixArithOp::UDiv; + arithOp = CooperativeMatrixArithOp::UDiv; break; case OpFMod: - arithOp = Builder::CooperativeMatrixArithOp::FMod; + arithOp = CooperativeMatrixArithOp::FMod; break; case OpSMod: - arithOp = Builder::CooperativeMatrixArithOp::SMod; + arithOp = CooperativeMatrixArithOp::SMod; break; case OpUMod: - arithOp = Builder::CooperativeMatrixArithOp::UMod; + arithOp = CooperativeMatrixArithOp::UMod; break; case OpSRem: - arithOp = Builder::CooperativeMatrixArithOp::SRem; + arithOp = CooperativeMatrixArithOp::SRem; break; case OpFRem: - arithOp = Builder::CooperativeMatrixArithOp::FRem; + arithOp = CooperativeMatrixArithOp::FRem; break; default: llvm_unreachable("Not support arithmetic for cooperative matrix"); return nullptr; } - lgc::Builder::CooperativeMatrixLayout layout = lgc::Builder::CooperativeMatrixLayout::InvalidLayout; - lgc::Builder::CooperativeMatrixElementType elemType = lgc::Builder::CooperativeMatrixElementType::Unknown; + lgc::CooperativeMatrixLayout layout = lgc::CooperativeMatrixLayout::InvalidLayout; + lgc::CooperativeMatrixElementType elemType = lgc::CooperativeMatrixElementType::Unknown; if (oc == OpFNegate || oc == OpSNegate) { auto unary = static_cast(spvVal); Value *srcVal = transValue(unary->getOperand(0), func, bb); @@ -10807,9 +10841,8 @@ Value *SPIRVToLLVM::transCooperativeMatrixArithInst(SPIRVValue *spvVal, BasicBlo // Translate cooperative matrix construction instructions to LLVM IR Value *SPIRVToLLVM::transCooperativeMatrixKHRFromConstruct(SPIRVType *spvCoopMatTy, const std::vector &constituents) { - lgc::Builder::CooperativeMatrixElementType elemType = - mapToBasicType(spvCoopMatTy->getCooperativeMatrixKHRComponentType()); - lgc::Builder::CooperativeMatrixLayout layout = getCooperativeMatrixKHRLayout( + lgc::CooperativeMatrixElementType elemType = mapToBasicType(spvCoopMatTy->getCooperativeMatrixKHRComponentType()); + lgc::CooperativeMatrixLayout layout = getCooperativeMatrixKHRLayout( static_cast(spvCoopMatTy->getCooperativeMatrixKHRUse()), elemType, spvCoopMatTy->getCooperativeMatrixKHRRows(), spvCoopMatTy->getCooperativeMatrixKHRColumns()); return getBuilder()->CreateCooperativeMatrixFill(constituents[0], elemType, layout); diff --git a/llpc/translator/lib/SPIRV/SPIRVReader.h b/llpc/translator/lib/SPIRV/SPIRVReader.h index 4e8ea3d63c..85d3085c01 100644 --- a/llpc/translator/lib/SPIRV/SPIRVReader.h +++ b/llpc/translator/lib/SPIRV/SPIRVReader.h @@ -63,6 +63,11 @@ class Value; } // namespace llvm using namespace llvm; +namespace Llpc { +class PipelineContext; +class RayTracingContext; +} // namespace Llpc + namespace SPIRV { class SPIRVLoopMerge; class SPIRVToLLVMDbgTran; @@ -297,12 +302,12 @@ class SPIRVToLLVM { SmallVector llvmInstructions; }; - lgc::Builder::CooperativeMatrixElementType mapToBasicType(Type *const ltType); - lgc::Builder::CooperativeMatrixElementType mapToBasicType(SPIRVType *const spvType); - lgc::Builder::CooperativeMatrixLayout getLayout(lgc::Builder::CooperativeMatrixElementType elemTy); - lgc::Builder::CooperativeMatrixLayout getCooperativeMatrixKHRLayout(CooperativeMatrixUse use, - lgc::Builder::CooperativeMatrixElementType elemTy, - unsigned rows, unsigned columns); + lgc::CooperativeMatrixElementType mapToBasicType(Type *const ltType); + lgc::CooperativeMatrixElementType mapToBasicType(SPIRVType *const spvType); + lgc::CooperativeMatrixLayout getLayout(lgc::CooperativeMatrixElementType elemTy); + lgc::CooperativeMatrixLayout getCooperativeMatrixKHRLayout(CooperativeMatrixUse use, + lgc::CooperativeMatrixElementType elemTy, unsigned rows, + unsigned columns); enum CooperativeMatrixMemoryAccess { CooperativeMatrixMemoryAccessNone = 0x00, @@ -432,12 +437,24 @@ class SPIRVToLLVM { Value *createTraceRayDialectOp(SPIRVValue *const spvValue); + // ======================================================================================================================== + // Wrapper method for easier access to the pipeline context. + // @returns : Pointer to the pipeline context of the current LLPC context. + // ======================================================================================================================== + Llpc::PipelineContext *getPipelineContext() const; + // ======================================================================================================================== // Wrapper method for easier access to pipeline options. // @returns : Pointer to the pipeline options of the current LLPC context. // ======================================================================================================================== const Vkgc::PipelineOptions *getPipelineOptions() const; + // ======================================================================================================================== + // Wrapper method for easier access to the raytracing context. + // @returns : Pointer to the pipeline context of the current raytracing context. + // ======================================================================================================================== + Llpc::RayTracingContext *getRaytracingContext() const; + // ======================================================================================================================== // Helper method for checking if the scratch out of bounds check was enabled. // @returns : Whether the check is enabled or not. diff --git a/llpc/util/llpcShaderModuleHelper.cpp b/llpc/util/llpcShaderModuleHelper.cpp index d83ccb1082..cdb1aee408 100644 --- a/llpc/util/llpcShaderModuleHelper.cpp +++ b/llpc/util/llpcShaderModuleHelper.cpp @@ -69,6 +69,7 @@ ShaderModuleUsage ShaderModuleHelper::getShaderModuleUsageInfo(const BinaryData ShaderModuleUsage shaderModuleUsage = {}; // Parse SPIR-V instructions std::unordered_set capabilities; + bool hasIndexDecoration = false; while (codePos < end) { unsigned opCode = (codePos[0] & OpCodeMask); @@ -179,6 +180,8 @@ ShaderModuleUsage ShaderModuleHelper::getShaderModuleUsageInfo(const BinaryData shaderModuleUsage.useBackSecondaryColor = true; } else if (decoration == DecorationPerVertexKHR) { shaderModuleUsage.useBarycentric = true; + } else if (decoration == DecorationIndex) { + hasIndexDecoration = true; } break; } @@ -210,6 +213,10 @@ ShaderModuleUsage ShaderModuleHelper::getShaderModuleUsageInfo(const BinaryData codePos += wordCount; } + // Without any DecorationIndex, it needs to disableDualSource + if (hasIndexDecoration == false) + shaderModuleUsage.disableDualSource = true; + if (capabilities.find(CapabilityVariablePointersStorageBuffer) != capabilities.end()) shaderModuleUsage.enableVarPtrStorageBuf = true; diff --git a/llpc/util/llpcUtil.cpp b/llpc/util/llpcUtil.cpp index 645ed0d7e2..a7281d7302 100644 --- a/llpc/util/llpcUtil.cpp +++ b/llpc/util/llpcUtil.cpp @@ -305,10 +305,10 @@ Vkgc::UniformConstantMapEntry *getUniformConstantEntryByLocation(const Llpc::Con if (context->getPipelineType() == PipelineType::Graphics) { auto *buildInfo = static_cast(context->getPipelineBuildInfo()); // Find the uniform constant map to use. - for (unsigned s = 0; s < buildInfo->numUniformConstantMaps; s++) { - if (buildInfo->ppUniformMaps[s] != nullptr && - isShaderStageInMask(stage, buildInfo->ppUniformMaps[s]->visibility)) { - accessedUniformMap = buildInfo->ppUniformMaps[s]; + for (unsigned s = 0; s < buildInfo->getGlState().numUniformConstantMaps; s++) { + if (buildInfo->getGlState().ppUniformMaps[s] != nullptr && + isShaderStageInMask(stage, buildInfo->getGlState().ppUniformMaps[s]->visibility)) { + accessedUniformMap = buildInfo->getGlState().ppUniformMaps[s]; break; } } diff --git a/shared/.clang-format b/llvmraytracing/.clang-format similarity index 100% rename from shared/.clang-format rename to llvmraytracing/.clang-format diff --git a/shared/continuations/.clang-tidy b/llvmraytracing/.clang-tidy similarity index 100% rename from shared/continuations/.clang-tidy rename to llvmraytracing/.clang-tidy diff --git a/shared/continuations/.gitignore b/llvmraytracing/.gitignore similarity index 100% rename from shared/continuations/.gitignore rename to llvmraytracing/.gitignore diff --git a/llvmraytracing/CMakeLists.txt b/llvmraytracing/CMakeLists.txt new file mode 100644 index 0000000000..780f763a20 --- /dev/null +++ b/llvmraytracing/CMakeLists.txt @@ -0,0 +1,112 @@ +cmake_minimum_required(VERSION 3.13.4) + +project(LlvmRaytracing LANGUAGES CXX) + +function(set_compiler_options PROJECT_NAME) + # Output with color if in terminal: https://github.com/ninja-build/ninja/wiki/FAQ + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + target_compile_options("${PROJECT_NAME}" PRIVATE -fdiagnostics-color=always) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_compile_options("${PROJECT_NAME}" PRIVATE -fcolor-diagnostics) + endif() +endfunction() + +option(LLVMRAYTRACING_BUILD_TESTS "Build raytracing tests") + +add_llvm_library(LLVMRaytracing + lib/CleanupContinuations.cpp + lib/Continuations.cpp + lib/ContinuationsDialect.cpp + lib/CpsStackLowering.cpp + lib/DXILContIntrinsicPrepare.cpp + lib/DXILContLgcRtOpConverter.cpp + lib/DXILContPostProcess.cpp + lib/DXILSupport.cpp + lib/GpurtContext.cpp + lib/GpurtDialect.cpp + lib/LegacyCleanupContinuations.cpp + lib/LgcCpsDialect.cpp + lib/LgcRtDialect.cpp + lib/LgcRtqDialect.cpp + lib/LowerAwait.cpp + lib/LowerRaytracingPipeline.cpp + lib/PassRegistry.inc + lib/PayloadAccessQualifiers.cpp + lib/RegisterBuffer.cpp + lib/RemoveTypesMetadata.cpp + lib/TypesMetadata.cpp + + DEPENDS + intrinsics_gen + + LINK_COMPONENTS + Analysis + Core + Coroutines + IPO + Scalar + Support + TransformUtils +) + +target_include_directories(LLVMRaytracing PUBLIC + $ + $ + $ +) + +llvm_map_components_to_libnames(extra_llvm_libs CompilerUtils) + +target_link_libraries(LLVMRaytracing PUBLIC llvm_dialects ${extra_llvm_libs} llpc_version) +set_compiler_options(LLVMRaytracing) + +# TableGen for dialects +if (EXISTS ${LLVM_TOOLS_BINARY_PATH}/llvm-dialects-tblgen) + set(RAYTRACING_TABLEGEN_EXE ${LLVM_TOOLS_BINARY_PATH}/llvm-dialects-tblgen) +else() + set(RAYTRACING_TABLEGEN_EXE $) +endif() +set(RAYTRACING_TABLEGEN_TARGET llvm-dialects-tblgen) + +macro(raytracing_tablegen DIALECTNAME FILE OUTPUT_FILENAME) + set(LLVM_TARGET_DEFINITIONS "${FILE}") + set(TBLGEN_TARGET "${OUTPUT_FILENAME}TableGen") + + tablegen(RAYTRACING "${OUTPUT_FILENAME}.h.inc" -gen-dialect-decls --dialect "${DIALECTNAME}" "${RAYTRACING_TABLEGEN_DEFINES}" + EXTRA_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../imported/llvm-dialects/include) + tablegen(RAYTRACING "${OUTPUT_FILENAME}.cpp.inc" -gen-dialect-defs --dialect "${DIALECTNAME}" "${RAYTRACING_TABLEGEN_DEFINES}" + EXTRA_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../imported/llvm-dialects/include) + add_public_tablegen_target(${TBLGEN_TARGET}) + + add_dependencies(LLVMRaytracing ${TBLGEN_TARGET}) + target_sources(LLVMRaytracing PRIVATE ${FILE}) +endmacro() + +raytracing_tablegen(continuations include/llvmraytracing/ContinuationsDialect.td ContinuationsDialect) +raytracing_tablegen(lgc.cps include/lgc/LgcCpsDialect.td LgcCpsDialect) +raytracing_tablegen(lgc.rt include/lgc/LgcRtDialect.td LgcRtDialect) +raytracing_tablegen(lgc.rtq include/lgc/LgcRtqDialect.td LgcRtqDialect) +raytracing_tablegen(lgc.gpurt include/lgc/GpurtDialect.td GpurtDialect) + +target_compile_features(LLVMRaytracing PUBLIC cxx_std_17) +set_target_properties(LLVMRaytracing PROPERTIES CXX_EXTENSIONS OFF) + +add_subdirectory(plugin) + +if(CONTINUATIONS_BUILD_TESTS) + message(WARNING "Deprecated flag CONTINUATIONS_BUILD_TEST used; use LLVMRAYTRACING_BUILD_TESTS instead") + set(LLVMRAYTRACING_BUILD_TESTS ON) +endif() +if(LLVMRAYTRACING_BUILD_TESTS) + add_subdirectory(test) + add_subdirectory(unittests) + + # Temporary aliases -- to be removed when Vulkan CI and DXCP have been updated. + add_custom_target(check-continuations DEPENDS check-llvmraytracing) + add_custom_target(check-continuations-units DEPENDS check-llvmraytracing-units) +endif() + +# Temporary alias -- to be removed when Vulkan CI and DXCP have been updated. +if (LLPC_RAYTRACING_ADD_TRANSITION_TARGETS) + add_library(LLVMContinuations ALIAS LLVMRaytracing) +endif() diff --git a/shared/continuations/README.md b/llvmraytracing/README.md similarity index 100% rename from shared/continuations/README.md rename to llvmraytracing/README.md diff --git a/llvmraytracing/include/continuations/Continuations.h b/llvmraytracing/include/continuations/Continuations.h new file mode 100644 index 0000000000..1e137767d8 --- /dev/null +++ b/llvmraytracing/include/continuations/Continuations.h @@ -0,0 +1,2 @@ +// Transition header -- to be removed +#include "llvmraytracing/Continuations.h" diff --git a/llvmraytracing/include/continuations/ContinuationsDialect.h b/llvmraytracing/include/continuations/ContinuationsDialect.h new file mode 100644 index 0000000000..7e3bb63cf4 --- /dev/null +++ b/llvmraytracing/include/continuations/ContinuationsDialect.h @@ -0,0 +1,2 @@ +// Transition header -- to be removed +#include "llvmraytracing/ContinuationsDialect.h" diff --git a/llvmraytracing/include/continuations/ContinuationsUtil.h b/llvmraytracing/include/continuations/ContinuationsUtil.h new file mode 100644 index 0000000000..c346b5f5ae --- /dev/null +++ b/llvmraytracing/include/continuations/ContinuationsUtil.h @@ -0,0 +1,2 @@ +// Transition header -- to be removed +#include "llvmraytracing/ContinuationsUtil.h" diff --git a/shared/continuations/include/lgc/GpurtDialect.h b/llvmraytracing/include/lgc/GpurtDialect.h similarity index 100% rename from shared/continuations/include/lgc/GpurtDialect.h rename to llvmraytracing/include/lgc/GpurtDialect.h diff --git a/shared/continuations/include/lgc/GpurtDialect.td b/llvmraytracing/include/lgc/GpurtDialect.td similarity index 97% rename from shared/continuations/include/lgc/GpurtDialect.td rename to llvmraytracing/include/lgc/GpurtDialect.td index 9ac6fd4a40..20ef2a410a 100644 --- a/shared/continuations/include/lgc/GpurtDialect.td +++ b/llvmraytracing/include/lgc/GpurtDialect.td @@ -314,3 +314,9 @@ def GpurtGetRayStaticIdOp : GpurtOp<"get.ray.static.id", [Memory<[(read Inaccess let results = (outs I32:$result); let summary = "get current ray static ID"; } + +def GpurtContinuationStackIsGlobalOp : GpurtOp<"continuation.stack.is.global", [Memory<[]>, WillReturn]> { + let arguments = (ins); + let results = (outs I1:$result); + let summary = "Check whether continuation stack is global"; +} diff --git a/shared/continuations/include/lgc/LgcCpsDialect.h b/llvmraytracing/include/lgc/LgcCpsDialect.h similarity index 87% rename from shared/continuations/include/lgc/LgcCpsDialect.h rename to llvmraytracing/include/lgc/LgcCpsDialect.h index d276e3c9d3..bbd764c215 100644 --- a/shared/continuations/include/lgc/LgcCpsDialect.h +++ b/llvmraytracing/include/lgc/LgcCpsDialect.h @@ -25,6 +25,7 @@ #pragma once +#include "llvm-dialects/Dialect/Builder.h" #include "llvm/ADT/ArrayRef.h" #include @@ -33,9 +34,11 @@ #include "LgcCpsDialect.h.inc" namespace llvm { -class Type; -class Function; +class AllocaInst; class DataLayout; +class Function; +class Type; +class Value; } // namespace llvm namespace lgc::cps { @@ -73,4 +76,9 @@ void setCpsFunctionLevel(llvm::Function &fn, CpsLevel level); CpsLevel getCpsLevelFromFunction(const llvm::Function &fn); CpsLevel getCpsLevelForShaderStage(CpsShaderStage stage); uint8_t getPotentialCpsReturnLevels(CpsShaderStage stage); +void pushStateToCpsStack(llvm_dialects::Builder &builder, + lgc::cps::JumpOp &jumpOp); +llvm::Value *popStateFromCpsStack(llvm_dialects::Builder &builder, + const llvm::DataLayout &DL, + llvm::Type *stateType); } // namespace lgc::cps diff --git a/shared/continuations/include/lgc/LgcCpsDialect.td b/llvmraytracing/include/lgc/LgcCpsDialect.td similarity index 100% rename from shared/continuations/include/lgc/LgcCpsDialect.td rename to llvmraytracing/include/lgc/LgcCpsDialect.td diff --git a/shared/continuations/include/lgc/LgcRtDialect.h b/llvmraytracing/include/lgc/LgcRtDialect.h similarity index 100% rename from shared/continuations/include/lgc/LgcRtDialect.h rename to llvmraytracing/include/lgc/LgcRtDialect.h diff --git a/shared/continuations/include/lgc/LgcRtDialect.td b/llvmraytracing/include/lgc/LgcRtDialect.td similarity index 100% rename from shared/continuations/include/lgc/LgcRtDialect.td rename to llvmraytracing/include/lgc/LgcRtDialect.td diff --git a/llvmraytracing/include/lgc/LgcRtqDialect.h b/llvmraytracing/include/lgc/LgcRtqDialect.h new file mode 100644 index 0000000000..1c342c18f7 --- /dev/null +++ b/llvmraytracing/include/lgc/LgcRtqDialect.h @@ -0,0 +1,43 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +// Declarations for the lgc.rtq dialect + +#pragma once + +#include + +#define GET_INCLUDES +#define GET_DIALECT_DECLS +#include "LgcRtqDialect.h.inc" + +namespace lgc { +namespace rtq { + +llvm::Type *getRayQueryType(llvm::LLVMContext &C); +bool isRayQueryType(llvm::Type *Ty); + +} // namespace rtq +} // namespace lgc diff --git a/llvmraytracing/include/lgc/LgcRtqDialect.td b/llvmraytracing/include/lgc/LgcRtqDialect.td new file mode 100644 index 0000000000..0b0df99a28 --- /dev/null +++ b/llvmraytracing/include/lgc/LgcRtqDialect.td @@ -0,0 +1,372 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +// lgc.rtq dialect: a dialect to represent ray query objects and operations on them +// +// Ray query objects are allocated via `alloca` in the usual address space for function-local variables. For now, +// they are represented as an i127. All ops operate on pointers to ray query objects, which is why we currently don't +// define a ray query type in TableGen; but there are C++ helpers. +// +// TODO: Use a TargetExt-type once some LLVM prerequisites are upstream. + +include "llvm-dialects/Dialect/Dialect.td" + +def LgcRtqDialect : Dialect { + let name = "lgc.rtq"; + let cppNamespace = "lgc::rtq"; +} + +def V2F32 : TgConstant<(FixedVectorType F32, 2)>, Type; +def V3F32 : TgConstant<(FixedVectorType F32, 3)>, Type; +def A4V3F32 : TgConstant<(ArrayType V3F32, 4)>, Type; + +class LgcRtqOp traits_ = []> + : Op; + +// ===================================================================================================================== +def InitializeOp : LgcRtqOp<"initialize", [Memory<[(readwrite ArgMem)]>]> { + let arguments = + (ins PointerType:$ray_query, I64:$acceleration_structure, I32:$ray_flags, + I32:$instance_inclusion_mask, V3F32:$ray_origin, F32:$t_min, V3F32:$direction, F32:$t_max); + let results = (outs); + + let summary = "initialize a ray query object for traversal"; + let description = [{ + Initialize a ray query object. Any previous traversal state is discarded. + }]; +} + +// ===================================================================================================================== +def TerminateOp : LgcRtqOp<"terminate", [Memory<[(readwrite ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query); + let results = (outs); + + let summary = "terminate the current traversal"; + let description = [{ + Behavior is undefined if there is no traversal in progress, e.g. because the previous traversal has completed. + }]; +} + +// ===================================================================================================================== +def ProceedOp : LgcRtqOp<"proceed", [Memory<[(readwrite ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query); + let results = (outs I1:$traversal_incomplete); + + let summary = "proceed with the current traversal"; + let description = [{ + Proceed with the current traversal until a candidate intersection is found or the traversal is complete. + + Returns true if a candidate intersection was found and false if the traversal is complete. + + Behavior is undefined if no traversal is in progress, e.g. because the previous traversal has completed. + }]; +} + +// ===================================================================================================================== +def IntersectionCommitAabbOp : LgcRtqOp<"intersection.commit.aabb", [Memory<[(readwrite ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query, F32:$t_hit); + let results = (outs); + + let summary = "commit an AABB intersection"; + let description = [{ + Set the committed intersection to the current candidate AABB intersection with the given `t_hit` value. + + `t_hit` must satisfy `t_min <= t_hit <= t_max`, where `t_max` is the T value of the current committed intersection, + if any, or the initial `t_max` value passed into `initialize` otherwise. + + The most recent use of `proceed` on the ray query object must have returned true, and the current candidate must + be of AABB type, otherwise behavior is undefined. + }]; +} + +// ===================================================================================================================== +def IntersectionCommitTriangleOp : LgcRtqOp<"intersection.commit.triangle", [Memory<[(readwrite ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query); + let results = (outs); + + let summary = "commit the current candidate triangle intersection"; + let description = [{ + Sets the committed intersection to the current candidate triangle intersection. + + The most recent use of `proceed` on the ray query object must have returned true, and the current candidate must + be of triangle type, otherwise behavior is undefined. + }]; +} + +// ===================================================================================================================== +def IntersectionTypeOp : LgcRtqOp<"intersection.type", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query, AttrI1:$committed); + let results = (outs I32:$result); + + let summary = "return the type of the current candidate intersection"; + let description = [{ + Returns the type of a currently recorded intersection, depending on `committed`: + + - if `committed` is false, returns 0 for candidate triangles and 1 for candidate AABB + - if `committed` is true, returns 0 for none, 1 for committed triangles, and 2 for committed AABB + + If `committed` is false, the most recent use of `proceed` must have returned true, or behavior is undefined. + }]; +} + +// ===================================================================================================================== +def RayTMinOp : LgcRtqOp<"ray.tmin", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query); + let results = (outs F32:$result); + + let summary = "return the Tmin value"; +} + +// ===================================================================================================================== +def RayFlagsOp : LgcRtqOp<"ray.flags", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query); + let results = (outs I32:$result); + + let summary = "return the ray flags"; +} + +// ===================================================================================================================== +def IntersectionTOp : LgcRtqOp<"intersection.t", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query, AttrI1:$committed); + let results = (outs F32:$result); + + let summary = "return the T value of an intersection"; + let description = [{ + Returns the T value of a currently recorded intersection, depending on `committed`. + + If `committed` is false and the most recent use of `proceed` returned false or the current candidate is not of + triangle type, behavior is undefined. + + If `committed` is true, behavior is undefined if there is no currently committed intersection. + }]; +} + +// ===================================================================================================================== +def IntersectionInstanceIdOp : LgcRtqOp<"intersection.instance.id", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query, AttrI1:$committed); + let results = (outs I32:$result); + + let summary = "return the instance ID of an intersection"; + let description = [{ + Returns the instance ID of a currently recorded intersection, depending on `committed`. + + The instance ID is the user-provided instance ID on the bottom-level acceleration structure instance within the + top-level structure. + + If `committed` is false and the most recent use of `proceed` returned false, behavior is undefined. + + If `committed` is true, behavior is undefined if there is no currently committed intersection. + }]; +} + +// ===================================================================================================================== +def IntersectionInstanceIndexOp : LgcRtqOp<"intersection.instance.index", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query, AttrI1:$committed); + let results = (outs I32:$result); + + let summary = "return the instance index of an intersection"; + let description = [{ + Returns the instance index of a currently recorded intersection, depending on `committed`. + + The instance index is the autogenerated index of the current instance in the top-level structure. + + If `committed` is false and the most recent use of `proceed` returned false, behavior is undefined. + + If `committed` is true, behavior is undefined if there is no currently committed intersection. + }]; +} + +// ===================================================================================================================== +def IntersectionContributionToHitGroupIndexOp : LgcRtqOp<"intersection.contribution.to.hit.group.index", + [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query, AttrI1:$committed); + let results = (outs I32:$result); + + let summary = "return the contribution to hit group index of an intersection"; + let description = [{ + Returns the contribution to hit group index of a currently recorded intersection, depending on `committed`. + + If `committed` is false, behavior is undefined if the most recent use of `proceed` returned false. + + If `committed` is true, behavior is undefined if there is no currently committed intersection. + }]; +} + +// ===================================================================================================================== +def IntersectionGeometryIndexOp : LgcRtqOp<"intersection.geometry.index", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query, AttrI1:$committed); + let results = (outs I32:$result); + + let summary = "return the geometry index of an intersection"; + let description = [{ + Returns the geometry index of a currently recorded intersection, depending on `committed`. + + If `committed` is false, behavior is undefined if the most recent use of `proceed` returned false. + + If `committed` is true, behavior is undefined if there is no currently committed intersection. + }]; +} + +// ===================================================================================================================== +def IntersectionPrimitiveIndexOp : LgcRtqOp<"intersection.primitive.index", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query, AttrI1:$committed); + let results = (outs I32:$result); + + let summary = "return the primitive index of an intersection"; + let description = [{ + Returns the primitive index of a currently recorded intersection, depending on `committed`. + + If `committed` is false, behavior is undefined if the most recent use of `proceed` returned false. + + If `committed` is true, behavior is undefined if there is no currently committed intersection. + }]; +} + +// ===================================================================================================================== +def IntersectionBarycentricsOp : LgcRtqOp<"intersection.barycentrics", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query, AttrI1:$committed); + let results = (outs V2F32:$result); + + let summary = "return the barycentrics of a triangle intersection"; + let description = [{ + Returns the barycentrics of a currently recorded triangle intersection, depending on `committed`. + + If `committed` is false, behavior is undefined if the most recent use of `proceed` returned false or the current + candidate is not of triangle type. + + If `committed` is true, behavior is undefined if there is no currently committed triangle intersection. + }]; +} + +// ===================================================================================================================== +def IntersectionFrontFaceOp : LgcRtqOp<"intersection.front.face", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query, AttrI1:$committed); + let results = (outs I1:$result); + + let summary = "return whether a triangle intersection hit the front face"; + let description = [{ + Returns whether a currently recorded triangle intersection hit the front face, depending on `committed`. + + If `committed` is false, behavior is undefined if the most recent use of `proceed` returned false or the current + candidate is not of triangle type. + + If `committed` is true, behavior is undefined if there is no currently committed triangle intersection. + }]; +} + +// ===================================================================================================================== +def IntersectionCandidateAabbOpaqueOp : LgcRtqOp<"intersection.candidate.aabb.opaque", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query); + let results = (outs I1:$result); + + let summary = "return whether the current candidate AABB is opaque"; + let description = [{ + Returns whether the current candidate AABB is opaque. + + Note: SPIR-V does not explicitly allow UB here regardless of the ray query object state, so we don't either. + }]; +} + +// ===================================================================================================================== +def IntersectionObjectRayDirectionOp : LgcRtqOp<"intersection.object.ray.direction", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query, AttrI1:$committed); + let results = (outs V3F32:$result); + + let summary = "return the ray direction in object space"; + let description = [{ + Returns the ray direction in object space for a recorded intersection, depending on `committed`. + + If `committed` is false, behavior is undefined if the most recent use of `proceed` returned false. + + If `committed` is true, behavior is undefined if there is no currently committed intersection. + }]; +} + +// ===================================================================================================================== +def IntersectionObjectRayOriginOp : LgcRtqOp<"intersection.object.ray.origin", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query, AttrI1:$committed); + let results = (outs V3F32:$result); + + let summary = "return the ray origin in object space"; + let description = [{ + Returns the ray origin in object space for a recorded intersection, depending on `committed`. + + If `committed` is false, behavior is undefined if the most recent use of `proceed` returned false. + + If `committed` is true, behavior is undefined if there is no currently committed intersection. + }]; +} + +// ===================================================================================================================== +def IntersectionWorldRayDirectionOp : LgcRtqOp<"intersection.world.ray.direction", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query); + let results = (outs V3F32:$result); + + let summary = "return the ray direction in world space"; + let description = [{ + Returns the ray direction in world space. + }]; +} + +// ===================================================================================================================== +def IntersectionWorldRayOriginOp : LgcRtqOp<"intersection.world.ray.origin", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query); + let results = (outs V3F32:$result); + + let summary = "return the ray origin in world space"; + let description = [{ + Returns the ray origin in world space. + }]; +} + +// ===================================================================================================================== +def IntersectionObjectToWorldOp : LgcRtqOp<"intersection.object.to.world", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query, AttrI1:$committed); + let results = (outs A4V3F32:$result); + + let summary = "return the object-to-world transformation matrix"; + let description = [{ + Returns the object-to-world transformation matrix for a recorded intersection, depending on `committed`. + + If `committed` is false, behavior is undefined if the most recent use of `proceed` returned false. + + If `committed` is true, behavior is undefined if there is no currently committed intersection. + }]; +} + +// ===================================================================================================================== +def IntersectionWorldToObjectOp : LgcRtqOp<"intersection.world.to.object", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query, AttrI1:$committed); + let results = (outs A4V3F32:$result); + + let summary = "return the world-to-object transformation matrix"; + let description = [{ + Returns the world-to-object transformation matrix for a recorded intersection, depending on `committed`. + + If `committed` is false, behavior is undefined if the most recent use of `proceed` returned false. + + If `committed` is true, behavior is undefined if there is no currently committed intersection. + }]; +} diff --git a/shared/continuations/include/continuations/Continuations.h b/llvmraytracing/include/llvmraytracing/Continuations.h similarity index 88% rename from shared/continuations/include/continuations/Continuations.h rename to llvmraytracing/include/llvmraytracing/Continuations.h index ca01437def..6dadf1f827 100644 --- a/shared/continuations/include/continuations/Continuations.h +++ b/llvmraytracing/include/llvmraytracing/Continuations.h @@ -70,13 +70,12 @@ // the case of RayGen, this argument is removed and replaced with a proper call // to SetupRayGen in the DXILContPostProcess pass. -#ifndef CONTINUATIONS_CONTINUATIONS_H -#define CONTINUATIONS_CONTINUATIONS_H +#pragma once #include "compilerutils/CompilerUtils.h" -#include "continuations/ContinuationsUtil.h" -#include "continuations/PayloadAccessQualifiers.h" #include "llvm-dialects/Dialect/Builder.h" +#include "llvmraytracing/ContinuationsUtil.h" +#include "llvmraytracing/PayloadAccessQualifiers.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -100,6 +99,7 @@ namespace llvm { class PassBuilder; class PassManagerBuilder; +class SmallBitVector; struct CoroSplitPass; // Returns the PAQShaderStage corresponding to the given DXILShaderKind, if @@ -107,21 +107,12 @@ struct CoroSplitPass; std::optional dxilShaderKindToPAQShaderStage(DXILShaderKind ShaderKind); -/// Changes the continuation stack pointer by I and returns the old and new CSP -/// value. -std::pair moveContinuationStackOffset(IRBuilder<> &B, - int32_t I); - -/// Convert an offset to the continuation stack to a pointer into the memory -/// where the continuation stack lives. -Value *continuationStackOffsetToPtr(IRBuilder<> &B, Value *Offset, - Module &GpurtLibrary, - CompilerUtils::CrossModuleInliner &Inliner); - /// Create a new function, as cloneFunctionHeader, but include types metadata. Function *cloneFunctionHeaderWithTypes(Function &F, ContFuncTy &NewType, ArrayRef ArgAttrs); +Function *cloneFunctionHeaderWithTypes(Function &F, ContFuncTy &NewType, + AttributeList FnAttr); /// Remove bitcasts of function pointers in metadata. /// This also removes the DXIL payload metadata from functions. /// Returns true if something changed. @@ -149,21 +140,8 @@ Function *getAccelStructAddr(Module &M, Type *HandleTy); Function *getContinuationContinue(Module &M); /// Get the continuation.waitContinue intrinsic. Function *getContinuationWaitContinue(Module &M); -/// Get the continuation.complete intrinsic. -Function *getContinuationComplete(Module &M); /// Get the await intrinsic. Function *getContinuationAwait(Module &M, Type *TokenTy, StructType *RetTy); -/// Get the CSP init intrinsic. -Function *getContinuationCspInit(Module &M); - -/// Get the type of the continuation stack pointer. -Type *getContinuationStackOffsetType(LLVMContext &Context); - -/// Get intrinsic to get the continuation stack offset. -/// This intrinsic will be converted to an alloca, but we need to access the -/// value through multiple passes and it's difficult to re-find an alloca, so we -/// delay creating the actual alloca to a late pass. -Function *getContinuationStackOffset(Module &M); /// Get function that returns the global memory base address if the continuation /// stack lives in global memory. @@ -190,6 +168,10 @@ CallInst *replaceIntrinsicCall(IRBuilder<> &B, Type *SystemDataTy, CallInst *Call, Module *GpurtLibrary, CompilerUtils::CrossModuleInliner &Inliner); +/// Terminate a shader by inserting a return instruction and taking care of +/// basic block splitting and preventing early returns. +void terminateShader(IRBuilder<> &Builder, CallInst *CompleteCall); + /// Transformations that run early on the driver/gpurt module. /// /// Replace intrinsics called by gpurt code that can be replaced early. @@ -239,22 +221,17 @@ class DialectContextAnalysis class LegacyCleanupContinuationsPass : public llvm::PassInfoMixin { public: - LegacyCleanupContinuationsPass(llvm::Module *GpurtLibrary = nullptr) - : GpurtLibrary(GpurtLibrary) {} + LegacyCleanupContinuationsPass() {} llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); static llvm::StringRef name() { return "legacy continuation cleanup"; } - -private: - Module *GpurtLibrary; }; class CleanupContinuationsPass : public llvm::PassInfoMixin { public: - CleanupContinuationsPass(llvm::Module *GpurtLibrary = nullptr) - : GpurtLibrary(GpurtLibrary) {} + CleanupContinuationsPass() {} llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); @@ -286,6 +263,7 @@ class CleanupContinuationsPass void handleSingleContinue(ContinuationData &Data, CallInst *Call, Value *ResumeFun); void lowerIntrinsicCall(Module &Mod); + void lowerGetResumePoint(Module &Mod); llvm_dialects::Builder *Builder; Function *ContMalloc; @@ -297,15 +275,11 @@ class CleanupContinuationsPass class LowerRaytracingPipelinePass : public llvm::PassInfoMixin { public: - LowerRaytracingPipelinePass(llvm::Module *GpurtLibrary = nullptr) - : GpurtLibrary(GpurtLibrary) {} + LowerRaytracingPipelinePass() {} llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); static llvm::StringRef name() { return "Lower raytracing pipeline pass"; } - -private: - Module *GpurtLibrary; }; class DXILContIntrinsicPreparePass @@ -323,15 +297,11 @@ class DXILContIntrinsicPreparePass class DXILContPostProcessPass : public llvm::PassInfoMixin { public: - DXILContPostProcessPass(llvm::Module *GpurtLibrary = nullptr) - : GpurtLibrary(GpurtLibrary) {} + DXILContPostProcessPass() {} llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); static llvm::StringRef name() { return "DXIL continuation post processing"; } - -private: - Module *GpurtLibrary; }; class LowerAwaitPass : public llvm::PassInfoMixin { @@ -375,23 +345,6 @@ class RegisterBufferPass : public llvm::PassInfoMixin { uint32_t TotalElementCount; }; -class SaveContinuationStatePass - : public llvm::PassInfoMixin { -public: - SaveContinuationStatePass() = default; - llvm::PreservedAnalyses run(llvm::Module &Module, - llvm::ModuleAnalysisManager &AnalysisManager); - - static llvm::StringRef name() { return "save continuation state"; } - -private: - void lowerCsp(Function *GetCsp); - - Type *I32; - IRBuilder<> *B; - Module *Mod; -}; - // No-op pass running before the DXIL continuations pipeline, e.g. for usage // with -print-after class DXILContPreHookPass : public llvm::PassInfoMixin { @@ -491,6 +444,14 @@ class DXILContLgcRtOpConverterPass void setupLocalRootIndex(Function *F); }; +Function *promotePointerArguments(Function *Fn, + const SmallBitVector &PromotionMask); + +/// Replace struct return type with it's first element type. +Function *unpackStructReturnType(Function *Fn); +/// Turn StructRet argument into return type. +Function *lowerStructRetArgument(Function *Fn); + /// Add necessary continuation transform passes for LGC. void addLgcContinuationTransform(ModulePassManager &MPM); @@ -499,5 +460,3 @@ void DXILValueTypeMetadataCallback(Value *V, unsigned TypeID, GetTypeByIDTy GetTypeByID, GetContainedTypeIDTy GetContainedTypeID); } // namespace llvm - -#endif diff --git a/shared/continuations/include/continuations/ContinuationsDialect.h b/llvmraytracing/include/llvmraytracing/ContinuationsDialect.h similarity index 99% rename from shared/continuations/include/continuations/ContinuationsDialect.h rename to llvmraytracing/include/llvmraytracing/ContinuationsDialect.h index 206044c7a2..fa74b7710f 100644 --- a/shared/continuations/include/continuations/ContinuationsDialect.h +++ b/llvmraytracing/include/llvmraytracing/ContinuationsDialect.h @@ -25,6 +25,8 @@ //===- ContinuationsDialect.h - Dialect definitions -----------------------===// +#pragma once + #define GET_INCLUDES #define GET_DIALECT_DECLS #include "ContinuationsDialect.h.inc" diff --git a/shared/continuations/include/continuations/ContinuationsDialect.td b/llvmraytracing/include/llvmraytracing/ContinuationsDialect.td similarity index 100% rename from shared/continuations/include/continuations/ContinuationsDialect.td rename to llvmraytracing/include/llvmraytracing/ContinuationsDialect.td diff --git a/shared/continuations/include/continuations/ContinuationsUtil.h b/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h similarity index 84% rename from shared/continuations/include/continuations/ContinuationsUtil.h rename to llvmraytracing/include/llvmraytracing/ContinuationsUtil.h index 84fb70b163..6a158ea2de 100644 --- a/shared/continuations/include/continuations/ContinuationsUtil.h +++ b/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h @@ -29,15 +29,13 @@ // //===----------------------------------------------------------------------===// -#ifndef CONTINUATIONS_CONTINUATIONS_UTIL_H -#define CONTINUATIONS_CONTINUATIONS_UTIL_H +#pragma once #include "lgc/LgcCpsDialect.h" #include "lgc/LgcRtDialect.h" #include "llpc/GpurtEnums.h" #include "llvm-dialects/Dialect/OpMap.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" @@ -47,7 +45,6 @@ #include "llvm/IR/PassManager.h" #include "llvm/Support/ErrorHandling.h" #include -#include #include #include #include @@ -222,22 +219,36 @@ class ContHelper { // for PAQed fields, and all other data required in a particular stage (e.g. // hit attributes). // - // MinPayloadRegisterCount: - // The minimum required number of payload registers, which is used to pass - // inter-module data into a module. Ensures a minimum size of the generated - // payload global, and thereby the payload size used to annotate Traversal and - // when lowering payload access intrinsics. This relies on the relevant copy - // of Traversal being processed last, after all app shader modules. This value - // is *not* guaranteed to be sufficiently large to account for payloads - // occurring in other pipelines that currently compiled shaders may be used - // with. Thus, we currently do *not* use it to annotate Intersection shaders. - static constexpr const char *MDMinPayloadRegisterCountName = - "continuation.minPayloadRegisterCount"; + // [in] PreservedPayloadRegisterCount: + // The required number of preserved payload registers for functions that + // are not aware of payload types (e.g. Intersection or Traversal), if known. + // This gives an upper bound on the number of payload registers used by other + // functions together with functions in the current module. + // Setting this value can be used to reduce the number of preserved registers + // for such functions to prevent having to preserve the maximum possible + // amount of payload registers. This is used when compiling a specialized + // Traversal function for a pipeline after all shaders in the pipeline have + // been processed. + // For intersection, it is not used, because early-compiled intersection + // shaders can be used in pipelines with large payload types unknown when + // compiling the intersection shader. + static constexpr const char *MDPreservedPayloadRegisterCountName = + "continuation.preservedPayloadRegisterCount"; + // [in] MaxPayloadRegisterCount // The maximum allowed number of payload registers to be used for payload and // other inter-stage date (e.g. attributes). If state does not fit into this // limit, we spill to the continuation stack. static constexpr const char *MDMaxPayloadRegisterCountName = "continuation.maxPayloadRegisterCount"; + // [out] MaxUsedPayloadRegisterCount + // The maximum number of payload registers written or read by any + // shader in the module. This excludes intersection shaders, which + // just pass through an existing payload. + // This can be used to populate PreservedPayloadRegisterCount when compiling + // the driver module in case all modules of the pipeline are known and + // have already been processed. + static constexpr const char *MDMaxUsedPayloadRegisterCountName = + "continuation.maxUsedPayloadRegisterCount"; // The address space used to store the continuations stack. // The possible values for this metadata are the values of ContStackAddrspace. static constexpr const char *MDStackAddrspaceName = @@ -258,6 +269,10 @@ class ContHelper { static constexpr const char *MDMaxPayloadBytesName = "continuation.maxPayloadBytes"; + // Whether this is a load instruction that should translate to a last_use + // load. + static constexpr const char *MDIsLastUseName = "amdgpu.last.use"; + static std::optional extractZExtI32Constant(MDNode *Node) { if (Node) { uint64_t Result = @@ -309,8 +324,7 @@ class ContHelper { static void RegisterPasses(llvm::PassBuilder &PB, bool NeedDialectContext); // Registers the generic Continuation pipeline to a LLVM Module Pass manager. - static void addContinuationPasses(llvm::ModulePassManager &MPM, - llvm::Module *GpurtLibrary); + static void addContinuationPasses(llvm::ModulePassManager &MPM); // Registers the DXIL-specific Continuation pipeline to a LLVM Module Pass // manager. @@ -364,23 +378,50 @@ class ContHelper { // If there is module-level metadata node, return its value. Otherwise, return // std::nullopt. static std::optional - tryGetMinPayloadRegisterCount(const Module &M) { - auto *MD = M.getNamedMetadata(MDMinPayloadRegisterCountName); + tryGetPreservedPayloadRegisterCount(const Module &M) { + auto *MD = M.getNamedMetadata(MDPreservedPayloadRegisterCountName); if (!MD) return {}; return extractZExtI32Constant(MD->getOperand(0)); }; - static void setMinPayloadRegisterCount(Module &M, - uint32_t MinPayloadRegisterCount) { - auto *MD = M.getOrInsertNamedMetadata(MDMinPayloadRegisterCountName); + static void + setPreservedPayloadRegisterCount(Module &M, + uint32_t PreservedPayloadRegisterCount) { + auto *MD = M.getOrInsertNamedMetadata(MDPreservedPayloadRegisterCountName); assert(MD && "Failed to create metadata node!"); MD->clearOperands(); - MD->addOperand(getI32MDConstant(M.getContext(), MinPayloadRegisterCount)); + MD->addOperand( + getI32MDConstant(M.getContext(), PreservedPayloadRegisterCount)); + } + + // Old alias until clients are migrated to setPreservedPayloadRegisterCount: + static void + setMinPayloadRegisterCount(Module &M, + uint32_t PreservedPayloadRegisterCount) { + setPreservedPayloadRegisterCount(M, PreservedPayloadRegisterCount); } // If there is module-level metadata specifying the maximum number // of payload registers, return that value. Otherwise, return std::nullopt. + static std::optional + tryGetMaxUsedPayloadRegisterCount(const Module &M) { + auto *MD = M.getNamedMetadata(MDMaxUsedPayloadRegisterCountName); + if (!MD) + return {}; + return extractZExtI32Constant(MD->getOperand(0)); + }; + + static void + setMaxUsedPayloadRegisterCount(Module &M, + uint32_t MaxUsedPayloadRegisterCount) { + auto *MD = M.getOrInsertNamedMetadata(MDMaxUsedPayloadRegisterCountName); + assert(MD && "Failed to create metadata node!"); + MD->clearOperands(); + MD->addOperand( + getI32MDConstant(M.getContext(), MaxUsedPayloadRegisterCount)); + } + static std::optional tryGetMaxPayloadRegisterCount(const Module &M) { auto *MD = M.getNamedMetadata(MDMaxPayloadRegisterCountName); @@ -397,22 +438,10 @@ class ContHelper { MD->addOperand(getI32MDConstant(M.getContext(), MaxPayloadRegisterCount)); } - // Returns the number of payload registers used in this module. - // Only available after having finished continuation passes. static std::optional tryGetPayloadRegisterCount(const Module &M) { - auto *Registers = M.getGlobalVariable(GlobalRegistersName); - if (!Registers) - return {}; - const uint32_t NumPayloadRegistersI32s = - Registers->getValueType()->getArrayNumElements(); - assert(NumPayloadRegistersI32s >= - tryGetMinPayloadRegisterCount(M).value_or(NumPayloadRegistersI32s)); - assert(NumPayloadRegistersI32s <= - tryGetMaxPayloadRegisterCount(M).value_or(NumPayloadRegistersI32s)); - return NumPayloadRegistersI32s; + return tryGetMaxUsedPayloadRegisterCount(M); } - // TODO: Remove this once dxcp calls the lgc::rt function directly. static void setMaxHitAttributeByteCount(Function &F, uint32_t MaxHitAttributeByteCount) { lgc::rt::setShaderHitAttributeSize(&F, MaxHitAttributeByteCount); @@ -521,6 +550,12 @@ class ContHelper { " metadata not found on CallInst!"); } + static void setPayloadTypeMetadata(Instruction *I, Type *T) { + I->setMetadata(ContHelper::MDContPayloadTyName, + MDNode::get(I->getContext(), + {ConstantAsMetadata::get(PoisonValue::get(T))})); + } + static bool isLgcCpsModule(Module &Mod) { return Mod.getNamedMetadata(MDLgcCpsModuleName) != nullptr; } @@ -540,6 +575,13 @@ class ContHelper { CI.setMetadata(ContHelper::MDIsWaitAwaitName, nullptr); } + // Specifies that this is a load that marks a last use of the pointer it loads + // from. + static void setIsLastUseLoad(LoadInst &Load) { + Load.setMetadata(ContHelper::MDIsLastUseName, + MDTuple::get(Load.getContext(), {})); + } + /// Returns true if a call to the given function should be rematerialized /// in a shader of the specified kind. /// @@ -547,6 +589,10 @@ class ContHelper { static bool isRematerializableLgcRtOp(CallInst &CInst, std::optional Kind = std::nullopt); + + static bool isLegacyEntryFunction(Function *Func) { + return Func->hasMetadata(MDEntryName); + } }; class ShaderStageHelper final { @@ -598,8 +644,30 @@ class ShaderStageHelper final { } }; -// Until all users have been migrated, provide old name as well: -class DXILContHelper : public ContHelper {}; +namespace ContDriverFunc { +#define DRIVER_FUNC_NAME(KEY) constexpr const char *KEY##Name = "_cont_" #KEY; +DRIVER_FUNC_NAME(GetContinuationStackGlobalMemBase) +DRIVER_FUNC_NAME(GetTriangleHitAttributes) +DRIVER_FUNC_NAME(SetTriangleHitAttributes) +DRIVER_FUNC_NAME(GetCandidateState) +DRIVER_FUNC_NAME(GetCommittedState) +DRIVER_FUNC_NAME(GetContinuationStackAddr) +DRIVER_FUNC_NAME(SetupRayGen) +DRIVER_FUNC_NAME(IsEndSearch) +DRIVER_FUNC_NAME(GetLocalRootIndex) +DRIVER_FUNC_NAME(SetLocalRootIndex) +DRIVER_FUNC_NAME(TraceRay) +DRIVER_FUNC_NAME(CallShader) +DRIVER_FUNC_NAME(ReportHit) +DRIVER_FUNC_NAME(AcceptHit) +DRIVER_FUNC_NAME(GetSbtAddress) +DRIVER_FUNC_NAME(GetSbtStride) +DRIVER_FUNC_NAME(HitKind) +DRIVER_FUNC_NAME(Traversal) +DRIVER_FUNC_NAME(KernelEntry) + +#undef DRIVER_FUNC_NAME +} // namespace ContDriverFunc /// Free-standing helpers. @@ -670,6 +738,9 @@ void forwardContinuationFrameStoreToLoad(DominatorTree &DT, Value *FramePtr); // Returns a typed pointer type if the pointer type is typed. PointerType *getWithSamePointeeType(PointerType *PtrTy, unsigned AddressSpace); +/// Look for the continue call that is dominated by the call to +/// GetResumePointAddr. Due to saving the payload before, many basic blocks may +/// have been inserted, traverse them while making sure that this +/// GetResumePointAddr is the only possible predecessor. +std::optional findDominatedContinueCall(CallInst *GetResPointAddr); } // namespace llvm - -#endif diff --git a/shared/continuations/include/continuations/CpsStackLowering.h b/llvmraytracing/include/llvmraytracing/CpsStackLowering.h similarity index 97% rename from shared/continuations/include/continuations/CpsStackLowering.h rename to llvmraytracing/include/llvmraytracing/CpsStackLowering.h index f0b5eab9c1..87453c3f48 100644 --- a/shared/continuations/include/continuations/CpsStackLowering.h +++ b/llvmraytracing/include/llvmraytracing/CpsStackLowering.h @@ -85,6 +85,10 @@ class CpsStackLowering { this->BasePointer = BasePointer; } + static unsigned getContinuationStackAlignment() { + return ContinuationStackAlignment; + } + TypeLowering TypeLower; private: diff --git a/shared/continuations/include/continuations/GpurtContext.h b/llvmraytracing/include/llvmraytracing/GpurtContext.h similarity index 92% rename from shared/continuations/include/continuations/GpurtContext.h rename to llvmraytracing/include/llvmraytracing/GpurtContext.h index bb5df3a12a..277c236280 100644 --- a/shared/continuations/include/continuations/GpurtContext.h +++ b/llvmraytracing/include/llvmraytracing/GpurtContext.h @@ -54,7 +54,10 @@ class GpurtContext : public llvm_dialects::ContextExtensionImpl { static Key theKey; - std::unique_ptr theModule; + // Pointer to ownedTheModule if exists, or pointer to a module owned + // externally (e.g., by driver) + llvm::Module *theModule = nullptr; + std::unique_ptr ownedTheModule; }; } // namespace lgc diff --git a/shared/continuations/include/continuations/PayloadAccessQualifiers.h b/llvmraytracing/include/llvmraytracing/PayloadAccessQualifiers.h similarity index 99% rename from shared/continuations/include/continuations/PayloadAccessQualifiers.h rename to llvmraytracing/include/llvmraytracing/PayloadAccessQualifiers.h index 79b58e9996..cb744d811c 100644 --- a/shared/continuations/include/continuations/PayloadAccessQualifiers.h +++ b/llvmraytracing/include/llvmraytracing/PayloadAccessQualifiers.h @@ -30,10 +30,9 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TRANSFORMS_PAYLOADACCESSQUALIFIERS_H -#define LLVM_TRANSFORMS_PAYLOADACCESSQUALIFIERS_H +#pragma once -#include "continuations/ContinuationsUtil.h" +#include "llvmraytracing/ContinuationsUtil.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/EnumeratedArray.h" #include "llvm/ADT/MapVector.h" @@ -974,5 +973,3 @@ class PAQSerializationInfoManager { }; } // namespace llvm - -#endif diff --git a/shared/continuations/lib/CleanupContinuations.cpp b/llvmraytracing/lib/CleanupContinuations.cpp similarity index 94% rename from shared/continuations/lib/CleanupContinuations.cpp rename to llvmraytracing/lib/CleanupContinuations.cpp index 4efb803a97..089df84842 100644 --- a/shared/continuations/lib/CleanupContinuations.cpp +++ b/llvmraytracing/lib/CleanupContinuations.cpp @@ -57,10 +57,11 @@ //===----------------------------------------------------------------------===// #include "compilerutils/CompilerUtils.h" -#include "continuations/Continuations.h" -#include "continuations/ContinuationsDialect.h" #include "lgc/LgcCpsDialect.h" #include "llvm-dialects/Dialect/Visitor.h" +#include "llvmraytracing/Continuations.h" +#include "llvmraytracing/ContinuationsDialect.h" +#include "llvmraytracing/GpurtContext.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -193,6 +194,7 @@ void CleanupContinuationsPass::updateCpsStack(Function *F, Function *NewFunc, if (IsStart) { CpsStack = Builder->create( Builder->getInt32(CpsInfo.ContStateBytes)); + CpsStack->setName("cont.state.stack.segment"); } else { CpsStack = Builder->create(Builder->getInt32(CpsInfo.ContStateBytes)); @@ -396,7 +398,6 @@ void CleanupContinuationsPass::processContinuations() { auto *Call = cast(--I->getIterator()); auto *Called = Call->getCalledFunction(); if (Called->getName() == "continuation.return") { - assert(Call->arg_empty() && "Should have no argument\n"); Builder->SetInsertPoint(Call); Builder->CreateRetVoid(); Call->eraseFromParent(); @@ -556,6 +557,28 @@ void CleanupContinuationsPass::lowerIntrinsicCall(Module &Mod) { } } +void CleanupContinuationsPass::lowerGetResumePoint(Module &Mod) { + auto *GetResumePoint = Mod.getFunction("_AmdGetResumePointAddr"); + if (!GetResumePoint) + return; + + for (auto &Use : make_early_inc_range(GetResumePoint->uses())) { + auto *GetResumeCall = dyn_cast(Use.getUser()); + // Get the lgc.cps.jump that is dominated by this _AmdGetResumePointAddr + // call. + auto JumpCall = findDominatedContinueCall(GetResumeCall); + assert(JumpCall && "Should find a dominated call to lgc.cps.jump"); + Value *ResumeFn = *cast(*JumpCall)->getTail().begin(); + assert(ResumeFn && isa(ResumeFn)); + // We can always move this as.continuation.reference call. + cast(ResumeFn)->moveBefore(GetResumeCall); + Builder->SetInsertPoint(GetResumeCall); + auto *ResumePtr = Builder->CreateZExt(ResumeFn, Builder->getInt64Ty()); + GetResumeCall->replaceAllUsesWith(ResumePtr); + GetResumeCall->eraseFromParent(); + } +} + llvm::PreservedAnalyses CleanupContinuationsPass::run(llvm::Module &Mod, llvm::ModuleAnalysisManager &AnalysisManager) { @@ -568,6 +591,7 @@ CleanupContinuationsPass::run(llvm::Module &Mod, MaxContStateBytes = 0; ContMalloc = Mod.getFunction("continuation.malloc"); ContFree = Mod.getFunction("continuation.free"); + GpurtLibrary = GpurtContext::get(Mod.getContext()).theModule; llvm_dialects::Builder B(Mod.getContext()); Builder = &B; @@ -626,6 +650,8 @@ CleanupContinuationsPass::run(llvm::Module &Mod, processContinuations(); // Lower lgc.rt intrinsics lowerIntrinsicCall(Mod); + + lowerGetResumePoint(Mod); return PreservedAnalyses::none(); } return PreservedAnalyses::all(); diff --git a/shared/continuations/lib/Continuations.cpp b/llvmraytracing/lib/Continuations.cpp similarity index 78% rename from shared/continuations/lib/Continuations.cpp rename to llvmraytracing/lib/Continuations.cpp index 3b22bec31f..b5f1d8eaa6 100644 --- a/shared/continuations/lib/Continuations.cpp +++ b/llvmraytracing/lib/Continuations.cpp @@ -29,18 +29,21 @@ // passes. //===----------------------------------------------------------------------===// -#include "continuations/Continuations.h" +#include "llvmraytracing/Continuations.h" #include "compilerutils/CompilerUtils.h" -#include "continuations/ContinuationsDialect.h" -#include "continuations/ContinuationsUtil.h" #include "lgc/LgcCpsDialect.h" #include "lgc/LgcRtDialect.h" #include "llvm-dialects/Dialect/Builder.h" #include "llvm-dialects/Dialect/Dialect.h" #include "llvm-dialects/Dialect/OpSet.h" +#include "llvmraytracing/ContinuationsDialect.h" +#include "llvmraytracing/ContinuationsUtil.h" +#include "llvmraytracing/GpurtContext.h" #include "llvm/ADT/IntervalTree.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/PassManager.h" @@ -54,6 +57,7 @@ #include "llvm/Transforms/Scalar/InstSimplifyPass.h" #include "llvm/Transforms/Scalar/SROA.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/FixIrreducible.h" #include "llvm/Transforms/Utils/LowerSwitch.h" @@ -90,6 +94,7 @@ const llvm_dialects::OpMap llvm::LgcRtGpuRtMap = {{ GPURTMAP_ENTRY(CallCallableShaderOp, "CallShader", false), GPURTMAP_ENTRY(PrimitiveIndexOp, "PrimitiveIndex", true), GPURTMAP_ENTRY(GeometryIndexOp, "GeometryIndex", true), + GPURTMAP_ENTRY(InstanceInclusionMaskOp, "InstanceInclusionMask", false), }}; #undef GPURTMAP_ENTRY @@ -159,7 +164,7 @@ bool ContHelper::isRematerializableLgcRtOp(CallInst &CInst, InstanceIdOp, InstanceIndexOp, GeometryIndexOp, ObjectRayDirectionOp, ObjectRayOriginOp, ObjectToWorldOp, PrimitiveIndexOp, RayFlagsOp, RayTminOp, WorldRayDirectionOp, - WorldRayOriginOp, WorldToObjectOp>(); + WorldRayOriginOp, WorldToObjectOp, InstanceInclusionMaskOp>(); if (RematerializableIntersectionDialectOps.contains(*Callee)) return true; } @@ -573,12 +578,11 @@ void ContHelper::RegisterPasses(PassBuilder &PB, bool NeedDialectContext) { } } -void ContHelper::addContinuationPasses(ModulePassManager &MPM, - Module *GpurtLibrary) { +void ContHelper::addContinuationPasses(ModulePassManager &MPM) { // Inline functions into shaders, so everything is in a shader MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/false)); - MPM.addPass(LowerRaytracingPipelinePass(GpurtLibrary)); + MPM.addPass(LowerRaytracingPipelinePass()); // Convert the system data struct to a value, so it isn't stored in the // continuation state @@ -591,12 +595,9 @@ void ContHelper::addContinuationPasses(ModulePassManager &MPM, MPM.addPass(createModuleToFunctionPassAdaptor(CoroElidePass())); MPM.addPass(CoroCleanupPass()); - MPM.addPass(LegacyCleanupContinuationsPass(GpurtLibrary)); + MPM.addPass(LegacyCleanupContinuationsPass()); MPM.addPass(RegisterBufferPass()); - MPM.addPass(SaveContinuationStatePass()); - MPM.addPass(DXILContPostProcessPass(GpurtLibrary)); - - MPM.addPass(RemoveTypesMetadataPass()); + MPM.addPass(DXILContPostProcessPass()); // The FixIrreducible pass does not cope with switch instructions, so lower // them before. @@ -614,13 +615,19 @@ void ContHelper::addContinuationPasses(ModulePassManager &MPM, void ContHelper::addDxilContinuationPasses(ModulePassManager &MPM, Module *GpurtLibrary) { + if (GpurtLibrary) { + // Set up GpurtContext so that later passes can access the library via it. + auto &GpurtContext = lgc::GpurtContext::get(GpurtLibrary->getContext()); + GpurtContext.theModule = GpurtLibrary; + } + MPM.addPass(DXILContPreHookPass()); // Translate dx.op intrinsic calls to lgc.rt dialect intrinsic calls MPM.addPass(DXILContLgcRtOpConverterPass()); // Add the generic continuations pipeline - addContinuationPasses(MPM, GpurtLibrary); + addContinuationPasses(MPM); // Remove dead instructions using the continuation token, which the translator // can't translate @@ -664,51 +671,6 @@ DialectContextAnalysis::run(llvm::Module &M, return DialectContextAnalysis::Result(); } -std::pair llvm::moveContinuationStackOffset(IRBuilder<> &B, - int32_t I) { - // %cont.frame.mem = load i32, i32* %csp - // %newcsp = add i32 %cont.frame.mem, I - // store i32 %newcsp, i32 %csp - - auto *CspType = getContinuationStackOffsetType(B.getContext()); - auto *Csp = B.CreateCall( - getContinuationStackOffset(*B.GetInsertPoint()->getModule())); - auto *OldCsp = B.CreateLoad(CspType, Csp); - auto *NewCsp = B.CreateAdd(OldCsp, B.getInt32(I)); - B.CreateStore(NewCsp, Csp); - - return std::make_pair(OldCsp, NewCsp); -} - -Value * -llvm::continuationStackOffsetToPtr(IRBuilder<> &B, Value *Offset, - Module &GpurtLibrary, - CompilerUtils::CrossModuleInliner &Inliner) { - assert(Offset->getType()->isIntegerTy(32) && - "Stack offset is expected to be an i32"); - Module *M = B.GetInsertPoint()->getModule(); - std::optional StackAddrspace = - ContHelper::tryGetStackAddrspace(*M); - if (!StackAddrspace) - report_fatal_error("Missing stack addrspace metadata!"); - if (*StackAddrspace == ContStackAddrspace::Scratch) - return B.CreateIntToPtr( - Offset, - B.getInt8Ty()->getPointerTo(static_cast(*StackAddrspace))); - - // Stack lives in global memory, so add the base address - assert(*StackAddrspace == ContStackAddrspace::Global && - "Unexpected address space of the continuation stack"); - auto *PtrTy = - B.getInt8Ty()->getPointerTo(static_cast(*StackAddrspace)); - auto *BaseAddr = - Inliner.inlineCall(B, getContinuationStackGlobalMemBase(GpurtLibrary)) - .returnValue; - auto *BaseAddrPtr = B.CreateIntToPtr(BaseAddr, PtrTy); - - return B.CreateGEP(B.getInt8Ty(), BaseAddrPtr, Offset); -} - Function *llvm::cloneFunctionHeaderWithTypes(Function &F, ContFuncTy &NewType, ArrayRef ArgAttrs) { FunctionType *FuncTy = NewType.asFunctionType(F.getContext()); @@ -717,6 +679,14 @@ Function *llvm::cloneFunctionHeaderWithTypes(Function &F, ContFuncTy &NewType, return NewFunc; } +Function *llvm::cloneFunctionHeaderWithTypes(Function &F, ContFuncTy &NewType, + AttributeList FnAttr) { + FunctionType *FuncTy = NewType.asFunctionType(F.getContext()); + Function *NewFunc = CompilerUtils::cloneFunctionHeader(F, FuncTy, FnAttr); + NewType.writeMetadata(NewFunc); + return NewFunc; +} + static bool stripMDCasts(MDTuple *MDTup) { bool Changed = false; for (unsigned I = 0; I < MDTup->getNumOperands(); I++) { @@ -771,29 +741,9 @@ bool llvm::fixupDxilMetadata(Module &M) { return Changed; } -Type *llvm::getContinuationStackOffsetType(LLVMContext &Context) { - return IntegerType::getInt32Ty(Context); -} - -Function *llvm::getContinuationStackOffset(Module &M) { - StringRef Name = "continuation.getContinuationStackOffset"; - if (auto *F = M.getFunction(Name)) - return F; - auto &C = M.getContext(); - AttributeList AL = AttributeList::get( - C, AttributeList::FunctionIndex, - {Attribute::NoFree, Attribute::NoRecurse, Attribute::NoSync, - Attribute::NoUnwind, Attribute::Speculatable, Attribute::WillReturn}); - auto *Func = cast( - M.getOrInsertFunction(Name, AL, - getContinuationStackOffsetType(C)->getPointerTo()) - .getCallee()); - Func->setDoesNotAccessMemory(); - return Func; -} - Function *llvm::getContinuationStackGlobalMemBase(Module &M) { - auto *F = M.getFunction("_cont_GetContinuationStackGlobalMemBase"); + auto *F = + M.getFunction(ContDriverFunc::GetContinuationStackGlobalMemBaseName); assert(F && "Could not find GetContinuationStackGlobalMemBase function"); assert(F->arg_size() == 0 && F->getReturnType()->isIntegerTy(64)); return F; @@ -814,7 +764,7 @@ bool llvm::isCastGlobal(GlobalValue *Global, Value *V) { uint64_t llvm::getInlineHitAttrsBytes(Module &M) { const DataLayout &DL = M.getDataLayout(); auto *GetTriangleHitAttributes = - M.getFunction("_cont_GetTriangleHitAttributes"); + M.getFunction(ContDriverFunc::GetTriangleHitAttributesName); assert(GetTriangleHitAttributes && "Could not find GetTriangleHitAttributes function"); auto *InlineHitAttrsTy = GetTriangleHitAttributes->getReturnType(); @@ -888,6 +838,19 @@ Function *llvm::getAccelStructAddr(Module &M, Type *HandleTy) { return Func; } +Function *llvm::getContinuationContinue(Module &M) { + auto *Name = "continuation.continue"; + if (auto *F = M.getFunction(Name)) + return F; + auto &C = M.getContext(); + auto *Void = Type::getVoidTy(C); + auto *I64 = Type::getInt64Ty(C); + auto *FuncTy = FunctionType::get(Void, {I64}, true); + AttributeList AL = AttributeList::get(C, AttributeList::FunctionIndex, + {Attribute::NoReturn}); + return cast(M.getOrInsertFunction(Name, FuncTy, AL).getCallee()); +} + Function *llvm::extractFunctionOrNull(Metadata *N) { auto *C = mdconst::extract_or_null(N); // Strip bitcasts @@ -957,7 +920,7 @@ llvm::replaceIntrinsicCall(IRBuilder<> &B, Type *SystemDataTy, if (Kind == DXILShaderKind::AnyHit || Kind == DXILShaderKind::Intersection) { auto *GetCandidateState = - GpurtLibrary->getFunction("_cont_GetCandidateState"); + GpurtLibrary->getFunction(ContDriverFunc::GetCandidateStateName); assert(GetCandidateState && "Could not find GetCandidateState function"); assert( GetCandidateState->getReturnType()->isStructTy() && @@ -968,7 +931,7 @@ llvm::replaceIntrinsicCall(IRBuilder<> &B, Type *SystemDataTy, GetHitData = GetCandidateState; } else { auto *GetCommittedState = - GpurtLibrary->getFunction("_cont_GetCommittedState"); + GpurtLibrary->getFunction(ContDriverFunc::GetCommittedStateName); assert(GetCommittedState && "Could not find GetCommittedState function"); assert( GetCommittedState->getReturnType()->isStructTy() && @@ -1110,6 +1073,40 @@ static void handleGetUninitialized(Function &Func) { }); } +void llvm::terminateShader(IRBuilder<> &Builder, CallInst *CompleteCall) { + Builder.SetInsertPoint(CompleteCall); + + [[maybe_unused]] Instruction *OldTerminator = + CompleteCall->getParent()->getTerminator(); + Type *FuncRetTy = CompleteCall->getFunction()->getReturnType(); + // During the driver transform, this will see a _cont_SetupRayGen which + // returns _AmdDispatchSystemData. Thus, we return a poison. Resume functions + // and other shaders will simply return a void value when this helper is being + // called from LegacyCleanupContinuations. These will be treated as + // continuation.complete by the translator. + ReturnInst *Ret = nullptr; + if (FuncRetTy->isVoidTy()) + Ret = Builder.CreateRetVoid(); + else + Ret = Builder.CreateRet(PoisonValue::get(FuncRetTy)); + + assert(OldTerminator != CompleteCall && + "terminateShader: Invalid terminator instruction provided!"); + + // If there is some code after the call to _AmdComplete or the intended + // continuation.return that aborts the shader, do the following: + // - Split everything after the completion call into a separate block + // - Remove the newly inserted unconditional branch to the split block + // - Remove the complete call. + // This is intended to work for _AmdComplete appearing in conditional code + // or the unreachable inserted by various passes before + // LegacyCleanupContinuations. + SplitBlock(CompleteCall->getParent(), CompleteCall); + // Remove the branch to the split block. + Ret->getParent()->getTerminator()->eraseFromParent(); + CompleteCall->eraseFromParent(); +} + bool llvm::earlyDriverTransform(Module &M) { // Import StackAddrspace from metadata if set, otherwise from default auto StackAddrspaceMD = ContHelper::tryGetStackAddrspace(M); @@ -1129,8 +1126,6 @@ bool llvm::earlyDriverTransform(Module &M) { Replacement = getContinuationWaitContinue(M); else if (Name.contains("Enqueue")) Replacement = getContinuationContinue(M); - else if (Name.contains("Complete")) - Replacement = getContinuationComplete(M); if (Replacement) { Changed = true; @@ -1256,6 +1251,258 @@ bool llvm::LgcMaterializable(Instruction &OrigI) { return false; } +std::optional +llvm::findDominatedContinueCall(CallInst *GetResPointAddr) { + SmallDenseSet Visited; + SmallDenseSet UnknownPreds; + SmallVector WorkList; + CallInst *Candidate = nullptr; + Visited.insert(GetResPointAddr->getParent()); + WorkList.push_back(GetResPointAddr->getParent()); + + while (!WorkList.empty()) { + auto *BB = WorkList.pop_back_val(); + // Check predecessors + if (BB != GetResPointAddr->getParent()) { + for (auto *Pred : predecessors(BB)) { + if (!Visited.contains(Pred)) + UnknownPreds.insert(Pred); + } + } + + auto *Terminator = BB->getTerminator(); + if (isa_and_nonnull(Terminator)) { + auto Before = --Terminator->getIterator(); + if (auto *ContinueCall = dyn_cast(Before)) { + if (Candidate != nullptr) { + LLVM_DEBUG(dbgs() << "Found multiple continue candidates after a " + "GetResumePointAddr:\n"; + Candidate->dump(); ContinueCall->dump()); + return {}; + } + Candidate = ContinueCall; + } else { + LLVM_DEBUG(dbgs() << "The BB must end in a (continue) call after a " + "GetResumePointAddr, but " + << BB->getName() << " doesn't"); + return {}; + } + } + + for (auto *Succ : successors(BB)) { + if (Visited.contains(Succ)) + continue; + Visited.insert(Succ); + UnknownPreds.erase(Succ); + WorkList.push_back(Succ); + } + } + + if (Candidate == nullptr) { + LLVM_DEBUG( + dbgs() << "Did not find a continue call after a GetResumePointAddr\n"); + return {}; + } + + if (!UnknownPreds.empty()) { + LLVM_DEBUG(dbgs() << "Found more than one predecessor for the continue " + "call after a GetResumePointAddr:\n"; + for (auto *Pred + : UnknownPreds) Pred->dump();); + return {}; + } + + return Candidate; +} + +/// Copy the function body from the old function. +static Function *cloneFunctionWithTypes(Function *Fn, ContFuncTy NewFnTy, + AttributeList FnAttrs) { + // Erase outdated types metadata to avoid being propagated to the new + // function. + Fn->eraseMetadata(Fn->getContext().getMDKindID(ContHelper::MDTypesName)); + Function *NewFn = cloneFunctionHeaderWithTypes(*Fn, NewFnTy, FnAttrs); + NewFn->splice(NewFn->begin(), Fn); + NewFn->takeName(Fn); + Fn->replaceAllUsesWith(ConstantExpr::getBitCast(NewFn, Fn->getType())); + return NewFn; +} + +/// Promote pointer argument type to its value type if the corresponding bit in +/// `PromotionMask` is being set. +Function *llvm::promotePointerArguments(Function *Fn, + const SmallBitVector &PromotionMask) { + SmallVector ArgTys; + SmallVector ParamAttrs; + + // Do nothing if the promotion mask is zero. + if (PromotionMask.none()) + return Fn; + + auto FnAttrs = Fn->getAttributes(); + // The function might not have types metadata like _cont_SetupRayGen, in which + // case nothing needs to be done. + if (!Fn->getMetadata(ContHelper::MDTypesName)) + return Fn; + + for (const auto &[ArgNo, Arg] : llvm::enumerate(Fn->args())) { + ContArgTy ArgTy = ContArgTy::get(Fn, &Arg); + + // Promote the pointer type to its value type if the bit in `PromotionMask` + // is set. + if (PromotionMask[ArgNo]) { + assert(ArgTy.isPointerTy()); + ArgTys.push_back(ArgTy.getPointerElementType()); + ParamAttrs.push_back({}); + continue; + } + ArgTys.push_back(ArgTy); + ParamAttrs.push_back(FnAttrs.getParamAttrs(ArgNo)); + } + + ContFuncTy NewFuncTy(ContFuncTy::get(Fn).ReturnTy, ArgTys); + auto NewFnAttr = AttributeList::get(Fn->getContext(), FnAttrs.getFnAttrs(), + FnAttrs.getRetAttrs(), ParamAttrs); + auto *NewFn = cloneFunctionWithTypes(Fn, NewFuncTy, NewFnAttr); + + IRBuilder<> B(Fn->getContext()); + // Change argument types at call sites. + llvm::forEachCall(*NewFn, [&](CallInst &Call) { + B.SetInsertPoint(&Call); + for (const auto &[ArgNo, ArgPair] : + llvm::enumerate(llvm::zip(Call.args(), NewFn->args()))) { + auto &CallArg = std::get<0>(ArgPair); + auto &NewArg = std::get<1>(ArgPair); + if (CallArg->getType() != NewArg.getType()) { + auto *NewOp = B.CreateLoad(NewArg.getType(), CallArg); + Call.setArgOperand(ArgNo, NewOp); + } + } + // Update Callee function type. + Call.setCalledFunction(NewFn); + }); + + // Replace argument uses. + for (const auto &[OldArg, NewArg] : llvm::zip(Fn->args(), NewFn->args())) { + Value *NewValue = &NewArg; + NewArg.setName(OldArg.getName()); + if (!NewFn->isDeclaration()) { + if (NewArg.getType() != OldArg.getType()) { + B.SetInsertPointPastAllocas(NewFn); + auto *ArgAlloca = B.CreateAlloca(NewArg.getType()); + B.CreateStore(&NewArg, ArgAlloca); + NewValue = ArgAlloca; + } + OldArg.replaceAllUsesWith(NewValue); + } + } + Fn->eraseFromParent(); + return NewFn; +} + +/// Unpack the return (struct) type of the input function, which means change +/// the return type to its first element type. This may generate invalid IR in +/// general, call this with extra caution. +Function *llvm::unpackStructReturnType(Function *Fn) { + auto *RetTy = Fn->getReturnType(); + assert(RetTy->isStructTy()); + auto *NewRetTy = RetTy->getStructElementType(0); + + ContFuncTy NewFnTy(NewRetTy, ContFuncTy::get(Fn).ArgTys); + auto *NewFn = cloneFunctionWithTypes(Fn, NewFnTy, Fn->getAttributes()); + llvm::forEachCall(*NewFn, [&](CallInst &Call) { + // Update callee function type. + Call.setCalledFunction(NewFn); + }); + + // Copy argument names and replace argument uses. + for (const auto &[OldArg, NewArg] : llvm::zip(Fn->args(), NewFn->args())) { + NewArg.setName(OldArg.getName()); + if (!NewFn->isDeclaration()) + OldArg.replaceAllUsesWith(&NewArg); + } + IRBuilder<> B(Fn->getContext()); + llvm::forEachTerminator( + NewFn, {Instruction::Ret}, [&](Instruction &Terminator) { + B.SetInsertPoint(&Terminator); + Value *RetExtractVal = + B.CreateExtractValue(Terminator.getOperand(0), {0}); + B.CreateRet(RetExtractVal); + Terminator.eraseFromParent(); + }); + Fn->eraseFromParent(); + return NewFn; +} + +// Turn `StructRet` argument into more canonical return statement. +Function *llvm::lowerStructRetArgument(Function *Fn) { + assert(Fn->getReturnType()->isVoidTy()); + auto *RetArg = Fn->getArg(0); + if (!RetArg->hasStructRetAttr()) + RetArg = Fn->getArg(1); + assert(RetArg->hasStructRetAttr()); + unsigned RetArgIdx = RetArg->getArgNo(); + Type *RetTy = RetArg->getParamStructRetType(); + + AttributeList FnAttrs = Fn->getAttributes(); + SmallVector ArgAttrs; + SmallVector NewArgTys; + const SmallVector &OldArgTys = ContFuncTy::get(Fn).ArgTys; + for (unsigned Idx = 0; Idx < Fn->arg_size(); Idx++) { + if (Idx != RetArgIdx) { + ArgAttrs.push_back(FnAttrs.getParamAttrs(Idx)); + NewArgTys.push_back(OldArgTys[Idx]); + } + } + + ContFuncTy NewFnTy(RetTy, NewArgTys); + auto NewFnAttr = AttributeList::get(Fn->getContext(), FnAttrs.getFnAttrs(), + FnAttrs.getRetAttrs(), ArgAttrs); + Function *NewFn = cloneFunctionWithTypes(Fn, NewFnTy, NewFnAttr); + + IRBuilder<> B(Fn->getContext()); + llvm::forEachCall(*NewFn, [&](CallInst &Call) { + B.SetInsertPoint(&Call); + Value *StructRetArg = nullptr; + SmallVector Args; + for (const auto &[Idx, Arg] : llvm::enumerate(Call.args())) { + if (Idx == RetArgIdx) { + StructRetArg = Arg; + continue; + } + Args.push_back(Arg); + } + auto *NewRet = B.CreateCall(NewFn, Args); + B.CreateStore(NewRet, StructRetArg); + Call.eraseFromParent(); + }); + + // Copy argument names and replace argument uses. + for (const auto &[ArgNo, NewArg] : llvm::enumerate(NewFn->args())) { + auto *OldArg = Fn->getArg(ArgNo >= RetArgIdx ? ArgNo + 1 : ArgNo); + NewArg.setName(OldArg->getName()); + if (!NewFn->isDeclaration()) + OldArg->replaceAllUsesWith(&NewArg); + } + + if (!NewFn->isDeclaration()) { + B.SetInsertPointPastAllocas(NewFn); + auto *RetAlloca = B.CreateAlloca(RetTy); + RetArg->replaceAllUsesWith(RetAlloca); + + // Replace returns with return value + llvm::forEachTerminator(NewFn, {Instruction::Ret}, + [&](Instruction &Terminator) { + B.SetInsertPoint(&Terminator); + Value *RetLoad = B.CreateLoad(RetTy, RetAlloca); + B.CreateRet(RetLoad); + Terminator.eraseFromParent(); + }); + } + Fn->eraseFromParent(); + return NewFn; +} + namespace llvm { void addLgcContinuationTransform(ModulePassManager &MPM) { MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/false)); diff --git a/shared/continuations/lib/ContinuationsDialect.cpp b/llvmraytracing/lib/ContinuationsDialect.cpp similarity index 97% rename from shared/continuations/lib/ContinuationsDialect.cpp rename to llvmraytracing/lib/ContinuationsDialect.cpp index 1d56d90af9..f18a3243c2 100644 --- a/shared/continuations/lib/ContinuationsDialect.cpp +++ b/llvmraytracing/lib/ContinuationsDialect.cpp @@ -25,7 +25,7 @@ //===- ContinuationsDialect.cpp - Dialect implementation ------------------===// -#include "continuations/ContinuationsDialect.h" +#include "llvmraytracing/ContinuationsDialect.h" #define GET_INCLUDES #define GET_DIALECT_DEFS diff --git a/shared/continuations/lib/CpsStackLowering.cpp b/llvmraytracing/lib/CpsStackLowering.cpp similarity index 89% rename from shared/continuations/lib/CpsStackLowering.cpp rename to llvmraytracing/lib/CpsStackLowering.cpp index ecb5b4bac7..921cd2fd12 100644 --- a/shared/continuations/lib/CpsStackLowering.cpp +++ b/llvmraytracing/lib/CpsStackLowering.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -23,10 +23,10 @@ * **********************************************************************************************************************/ -#include "continuations/CpsStackLowering.h" -#include "continuations/ContinuationsUtil.h" +#include "llvmraytracing/CpsStackLowering.h" #include "lgc/LgcCpsDialect.h" #include "llvm-dialects/Dialect/Visitor.h" +#include "llvmraytracing/ContinuationsUtil.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" @@ -121,8 +121,8 @@ void CpsStackLowering::visitGetElementPtr(GetElementPtrInst &GEP) { Value *ScaledVal = Index; if (Scaling.getSExtValue() != 1) - ScaledVal = - Builder.CreateMul(Index, Builder.getInt32(Scaling.getSExtValue())); + ScaledVal = Builder.CreateMul(ScaledVal, + Builder.getInt32(Scaling.getSExtValue())); AddChain = Builder.CreateAdd(AddChain, ScaledVal); } @@ -143,6 +143,9 @@ void CpsStackLowering::visitLoad(LoadInst &Load) { IRBuilder<> Builder(&Load); Values[0] = getRealMemoryAddress(Builder, Values[0]); + Values[0] = Builder.CreateBitCast( + Values[0], Load.getType()->getPointerTo(getLoweredCpsStackAddrSpace())); + Load.replaceUsesOfWith(Load.getPointerOperand(), Values[0]); } @@ -159,6 +162,10 @@ void CpsStackLowering::visitStore(llvm::StoreInst &Store) { IRBuilder<> Builder(&Store); Values[0] = getRealMemoryAddress(Builder, Values[0]); + Values[0] = Builder.CreateBitCast( + Values[0], Store.getValueOperand()->getType()->getPointerTo( + getLoweredCpsStackAddrSpace())); + Store.replaceUsesOfWith(Store.getPointerOperand(), Values[0]); } @@ -259,9 +266,11 @@ void CpsStackLowering::visitCpsPeek(lgc::cps::PeekOp &PeekOp) { auto *Ptr = Builder.CreateLoad(CpsStackAlloca->getAllocatedType(), CpsStackAlloca); auto *Size = PeekOp.getSize(); + int ImmSize = cast(Size)->getSExtValue(); assert(ImmSize >= 0); ImmSize = alignTo(ImmSize, ContinuationStackAlignment); + // Assuming continuation stack grows upward. auto *Result = Builder.CreateAdd(Ptr, Builder.getInt32(-ImmSize)); @@ -299,15 +308,28 @@ void CpsStackLowering::visitGetVsp(lgc::cps::GetVspOp &GetVsp) { // or a base pointer injected by calling @setRealBasePointer. // // @param Builder: the builder to use. -// @param Val: The offset to the base address, given as integer with bitwidth +// @param Offset: The offset to the base address, given as integer with bitwidth // <= 32. // Value *CpsStackLowering::getRealMemoryAddress(IRBuilder<> &Builder, - Value *Val) { + Value *Offset) { // Since we are using at most 32-bit offsets, assert that we don't put in any // offset larger 32 bit. - assert(Val->getType()->isIntegerTy() && - Val->getType()->getIntegerBitWidth() <= 32); - return Builder.CreateGEP(Type::getInt8Ty(Builder.getContext()), BasePointer, - {Val}); + assert(Offset->getType()->isIntegerTy() && + Offset->getType()->getIntegerBitWidth() <= 32); + + // Create a byte-addressed GEP the global memory address + offset or just the + // offset. Note: Don't currently return a inttoptr because the translator + // doesn't cope well with addrspace(21) inttoptr instructions. + Value *GepBase = BasePointer; + Value *GepIndex = Offset; + + Type *I8 = Builder.getInt8Ty(); + if (isa(BasePointer)) { + GepBase = Builder.CreateIntToPtr( + Offset, I8->getPointerTo(getLoweredCpsStackAddrSpace())); + GepIndex = Builder.getInt32(0); + } + + return Builder.CreateGEP(I8, GepBase, {GepIndex}); } diff --git a/llvmraytracing/lib/DXILContIntrinsicPrepare.cpp b/llvmraytracing/lib/DXILContIntrinsicPrepare.cpp new file mode 100644 index 0000000000..d58d6711b4 --- /dev/null +++ b/llvmraytracing/lib/DXILContIntrinsicPrepare.cpp @@ -0,0 +1,210 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +//===- DXILContIntrinsicPrepare.cpp - Change signature of functions -------===// +// +// A pass that prepares driver implemented functions for later use. +// +// This pass unmangles function names and changes sret arguments back to +// return values. +// +//===----------------------------------------------------------------------===// + +#include "lgc/LgcRtDialect.h" +#include "llvmraytracing/Continuations.h" +#include "llvmraytracing/ContinuationsUtil.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "dxil-cont-intrinsic-prepare" + +DXILContIntrinsicPreparePass::DXILContIntrinsicPreparePass() {} + +/// - Unmangle the function names to be more readable and to prevent confusion +/// with app defined functions later. +/// - Convert sret arguments back to return values +/// - Convert struct pointer arguments to pass structs by value +static Function *transformFunction(Function &F) { + { + // Local scope for Name which is invalidated at the end. + auto Name = F.getName(); + LLVM_DEBUG(dbgs() << "Transforming function " << Name << "\n"); + std::string NewName = Name.str(); + + // Unmangle declarations because they cannot be renamed in the dx api + if (Name.contains('@')) { + // Extract unmangled name + auto Start = Name.find('?') + 1; + auto End = Name.find('@', Start); + if (Start == 0 || End == StringRef::npos || Start > Name.size() || + End > Name.size()) { + report_fatal_error( + Twine( + "Failed to unmangle function name: Failed to extract from '") + + Name + "' (start: " + Twine(Start) + ", end: " + Twine(End) + ")"); + } + + // Copy name, otherwise it will be deleted before it's set + NewName = Name.substr(Start, End - Start).str(); + } + + LLVM_DEBUG(dbgs() << " Set new name " << NewName << "\n"); + + if (NewName == ContDriverFunc::TraversalName) + lgc::rt::setLgcRtShaderStage(&F, + lgc::rt::RayTracingShaderStage::Traversal); + else if (NewName == ContDriverFunc::KernelEntryName) + lgc::rt::setLgcRtShaderStage(&F, + lgc::rt::RayTracingShaderStage::KernelEntry); + F.setName(NewName); + } + + // Unpack the inner type of @class.matrix types + Type *NewRetTy = F.getReturnType(); + Function *NewFn = &F; + if (NewRetTy->isStructTy() && NewRetTy->getStructNumElements() == 1) { + if (F.getName().contains("ObjectToWorld4x3") || + F.getName().contains("WorldToObject4x3")) { + NewFn = unpackStructReturnType(NewFn); + } + } + + // Lower `StructRet` argument. + if (NewFn->hasStructRetAttr()) + NewFn = lowerStructRetArgument(NewFn); + + SmallBitVector PromotionMask(NewFn->arg_size()); + + StringRef NameStr = NewFn->getName(); + for (unsigned ArgNo = 0; ArgNo < NewFn->arg_size(); ArgNo++) { + auto *Arg = NewFn->getArg(ArgNo); + ContArgTy ArgTy = ContArgTy::get(NewFn, Arg); + if (!ArgTy.isPointerTy()) + continue; + + if ((NameStr.contains("Await") || NameStr.contains("Enqueue") || + NameStr.contains("Traversal") || + (NameStr == ContDriverFunc::SetTriangleHitAttributesName && + ArgNo != 0))) + PromotionMask.set(ArgNo); + } + // Promote pointer arguments to their pointee value types. + NewFn = promotePointerArguments(NewFn, PromotionMask); + + NewFn->addFnAttr(Attribute::AlwaysInline); + // Set external linkage, so the functions don't get removed, even if they are + // never referenced at this point + NewFn->setLinkage(GlobalValue::LinkageTypes::ExternalLinkage); + return NewFn; +} + +static bool isGpuRtFuncName(StringRef Name) { + for (const auto &Intr : LgcRtGpuRtMap) { + if (Name.contains(Intr.second.Name)) + return true; + } + + return false; +} + +static bool isUtilFunction(StringRef Name) { + static const char *UtilNames[] = { + "AcceptHit", + "Await", + "Complete", + "ContinuationStackIsGlobal", + "ContStack", + "Enqueue", // To detect the mangled name of a declaration + "GetI32", + "GetCandidateState", + "GetCommittedState", + "GetContinuationStackAddr", + "GetContinuationStackGlobalMemBase", + "GetCurrentFuncAddr", + "GetFuncAddr", + "GetLocalRootIndex", + "GetResumePointAddr", + "GetRtip", + "GetShaderKind", + "GetTriangleHitAttributes", + "GetUninitialized", + "I32Count", + "IsEndSearch", + "KernelEntry", + "ReportHit", + "RestoreSystemData", + "SetI32", + "SetTriangleHitAttributes", + "SetupRayGen", + "TraceRay", + "Traversal", + }; + + for (const char *UtilName : UtilNames) { + if (Name.contains(UtilName)) + return true; + } + + return false; +} + +llvm::PreservedAnalyses DXILContIntrinsicPreparePass::run( + llvm::Module &M, llvm::ModuleAnalysisManager &AnalysisManager) { + LLVM_DEBUG(dbgs() << "Run the dxil-cont-intrinsic-prepare pass\n"); + + SmallVector Funcs(make_pointer_range(M.functions())); + + for (auto *F : Funcs) { + auto Name = F->getName(); + bool ShouldTransform = false; + + if (Name.contains("_cont_")) { + if (isGpuRtFuncName(Name)) + ShouldTransform = true; + else if (isUtilFunction(Name)) + ShouldTransform = true; + } else if (Name.contains("_Amd") && isUtilFunction(Name)) { + ShouldTransform = true; + } + + if (ShouldTransform) + transformFunction(*F); + } + + fixupDxilMetadata(M); + + earlyDriverTransform(M); + + return PreservedAnalyses::none(); +} diff --git a/shared/continuations/lib/DXILContLgcRtOpConverter.cpp b/llvmraytracing/lib/DXILContLgcRtOpConverter.cpp similarity index 98% rename from shared/continuations/lib/DXILContLgcRtOpConverter.cpp rename to llvmraytracing/lib/DXILContLgcRtOpConverter.cpp index 75d246066b..d5a9cc97ed 100644 --- a/shared/continuations/lib/DXILContLgcRtOpConverter.cpp +++ b/llvmraytracing/lib/DXILContLgcRtOpConverter.cpp @@ -29,9 +29,9 @@ // //===----------------------------------------------------------------------===// -#include "continuations/Continuations.h" -#include "continuations/ContinuationsUtil.h" #include "lgc/LgcRtDialect.h" +#include "llvmraytracing/Continuations.h" +#include "llvmraytracing/ContinuationsUtil.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" @@ -444,10 +444,7 @@ void DXILContLgcRtOpConverterPass::addDXILPayloadTypeToCall(Function &DXILFunc, ContArgTy::get(&DXILFunc, PayloadPtr).getPointerElementType(); // Store a poison value as metadata with the given type. - CI.setMetadata( - ContHelper::MDContPayloadTyName, - MDNode::get(CI.getContext(), - {ConstantAsMetadata::get(PoisonValue::get(PayloadPtrTy))})); + ContHelper::setPayloadTypeMetadata(&CI, PayloadPtrTy); } bool DXILContLgcRtOpConverterPass::convertDxOp(Function &Func) { diff --git a/shared/continuations/lib/DXILContPostProcess.cpp b/llvmraytracing/lib/DXILContPostProcess.cpp similarity index 62% rename from shared/continuations/lib/DXILContPostProcess.cpp rename to llvmraytracing/lib/DXILContPostProcess.cpp index 30bb282404..cc972ef0ab 100644 --- a/shared/continuations/lib/DXILContPostProcess.cpp +++ b/llvmraytracing/lib/DXILContPostProcess.cpp @@ -37,23 +37,31 @@ //===----------------------------------------------------------------------===// #include "compilerutils/CompilerUtils.h" -#include "continuations/Continuations.h" -#include "continuations/ContinuationsDialect.h" -#include "continuations/ContinuationsUtil.h" +#include "lgc/LgcCpsDialect.h" +#include "lgc/LgcRtDialect.h" +#include "llvm-dialects/Dialect/Builder.h" #include "llvm-dialects/Dialect/Visitor.h" +#include "llvmraytracing/Continuations.h" +#include "llvmraytracing/ContinuationsDialect.h" +#include "llvmraytracing/ContinuationsUtil.h" +#include "llvmraytracing/CpsStackLowering.h" +#include "llvmraytracing/GpurtContext.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/Error.h" #include using namespace llvm; @@ -85,8 +93,8 @@ class DXILContPostProcessPassImpl final { DXILContPostProcessPassImpl(Module &M, Module &GpurtLibrary); bool run(llvm::ModuleAnalysisManager &AnalysisManager); - static constexpr unsigned SystemDataArgumentIndexStart = 2; - static constexpr unsigned SystemDataArgumentIndexContinuation = 1; + static constexpr unsigned SystemDataArgumentIndexStartWithoutCsp = 1; + static constexpr unsigned SystemDataArgumentIndexContinuationWithoutCsp = 0; static constexpr unsigned SystemDataArgumentIndexRayGen = 0; struct FunctionData { @@ -98,14 +106,16 @@ class DXILContPostProcessPassImpl final { bool IsStart = true; Type *SystemDataTy = nullptr; unsigned SystemDataArgumentIndex = std::numeric_limits::max(); + + Value *CspStorage = nullptr; + Value *CspInitializerArg = nullptr; + Value *BasePointer = nullptr; }; private: + bool addIncomingCsp(); void lowerGetResumePointAddr(Function &F); - void handleInitialContinuationStackPtr(Function &F); - void handleLgcRtIntrinsic(Function &F); - void handleRegisterBufferSetPointerBarrier(Function &F, - GlobalVariable *Payload); + bool passOutgoingCsp(); void handleRegisterBufferGetPointer(Function &F, GlobalVariable *Payload); void handleValueI32Count(Function &F); void handleValueGetI32(Function &F); @@ -114,38 +124,46 @@ class DXILContPostProcessPassImpl final { void handleContPayloadRegisterI32Count(Function &F); void handleContPayloadRegistersGetI32(Function &F); void handleContPayloadRegistersSetI32(Function &F); - void handleContStackAlloc(FunctionAnalysisManager &FAM, Function &F); + void handleContStackIntrinsic(FunctionAnalysisManager &FAM, Function &F); + void initializeProcessableFunctionData(); + bool handleRegisterBufferCalls(); bool replaceIntrinsicCalls(Function &F, const FunctionData &Data); [[nodiscard]] std::pair insertSetupRayGen(Function &F, const FunctionData &Data); - - void collectProcessableFunctions(); - bool handleIntrinsicCalls(); + bool handleIntrinsicCalls(llvm::ModuleAnalysisManager &AnalysisManager); bool replaceIntrinsicCallsAndSetupRayGen(); + bool lowerCpsOps(); bool unfoldGlobals(); - bool handleAmdInternals(llvm::ModuleAnalysisManager &AnalysisManager); + bool handleAmdInternals(); Module *Mod; Module *GpurtLibrary; GlobalVariable *Registers; MapVector ToProcess; Function *SetupRayGen; - IRBuilder<> Builder; + llvm_dialects::Builder Builder; + std::optional StackAddrspace; + std::optional StackLowering; CompilerUtils::CrossModuleInliner CrossInliner; + + // For performance reasons, we keep this list of continuation.{wait}Continue + // calls here and update it when required. + SmallVector ContinueCalls; + Function *GlobalMemBase = nullptr; }; // Collects all calls to continuation.[wait]continue -static void collectContinueCalls(const Module &M, - SmallVectorImpl &CallInsts) { +static void collectContinueCalls(const Module &Mod, + SmallVectorImpl &ContinueCalls) { for (const auto &Name : {"continuation.continue", "continuation.waitContinue"}) { - auto *Func = M.getFunction(Name); + auto *Func = Mod.getFunction(Name); if (!Func) continue; - llvm::forEachCall(*Func, - [&](CallInst &CInst) { CallInsts.push_back(&CInst); }); + llvm::forEachCall( + *Func, [&](CallInst &CInst) { ContinueCalls.push_back(&CInst); }); } } @@ -164,6 +182,7 @@ static void reportContStateSizes(Module &M) { EntriesWithContinuationFunctions.insert(EntryF); } } + for (auto &F : M) { auto Stage = lgc::rt::getLgcRtShaderStage(&F); if (!Stage || F.isDeclaration()) @@ -185,12 +204,10 @@ static void reportContStateSizes(Module &M) { // For every function with incoming or outgoing (or both) payload registers, // report the incoming size and the max outgoing size in bytes. -static void reportPayloadSizes(Module &M) { +static void reportPayloadSizes(Module &M, ArrayRef ContinueCalls) { // For every function with continue calls, determine the max number of // outgoing registers DenseMap MaxOutgoingRegisterCounts; - SmallVector ContinueCalls; - collectContinueCalls(M, ContinueCalls); for (auto *CallInst : ContinueCalls) { auto RegCount = ContHelper::tryGetOutgoingRegisterCount(CallInst).value(); @@ -293,7 +310,9 @@ static bool addGetAddrAndMDIntrinsicCalls(Module &M) { for (auto &F : M.functions()) { // Speed-up: Skip F if it cannot be used as pointer, e.g. dx intrinsics. - if (!canBeUsedAsPtr(F)) + // Skip CPS functions here as well since they use + // lgc.cps.as.continuation.reference instead of getAddrAndMD. + if (!canBeUsedAsPtr(F) || lgc::cps::isCpsFunction(F)) continue; CEWorkList.clear(); @@ -364,11 +383,11 @@ static bool addGetAddrAndMDIntrinsicCalls(Module &M) { /// Checks some properties guaranteed for a module containing continuations /// as expected by the backend. -[[maybe_unused]] static void checkContinuationsModule(const Module &M) { +[[maybe_unused]] static void +checkContinuationsModule(const Module &M, + const SmallVectorImpl &ContinueCalls) { // Check that all continuation.continue calls have registercount metadata. - SmallVector CallInsts; - collectContinueCalls(M, CallInsts); - for (auto *CallInst : CallInsts) { + for (auto *CallInst : ContinueCalls) { if (!ContHelper::tryGetOutgoingRegisterCount(CallInst)) report_fatal_error("Missing registercount metadata on continue call!"); } @@ -407,71 +426,135 @@ static void replaceGlobal(const DataLayout &DL, GlobalVariable *Registers, G->eraseFromParent(); } -/// Look for the continue call that follows the call to GetResumePointAddr. -/// Due to saving the payload before, many basic blocks may have been inserted, -/// traverse them while making sure that this GetResumePointAddr is the only -/// possible predecessor. -static std::optional findContinueCall(CallInst *GetResPointAddr) { - SmallDenseSet Visited; - SmallDenseSet UnknownPreds; - SmallVector WorkList; - CallInst *Candidate = nullptr; - Visited.insert(GetResPointAddr->getParent()); - WorkList.push_back(GetResPointAddr->getParent()); - - while (!WorkList.empty()) { - auto *BB = WorkList.pop_back_val(); - // Check predecessors - if (BB != GetResPointAddr->getParent()) { - for (auto *Pred : predecessors(BB)) { - if (!Visited.contains(Pred)) - UnknownPreds.insert(Pred); - } +bool DXILContPostProcessPassImpl::addIncomingCsp() { + SmallVector Candidates; + + for (Function &Func : *Mod) { + if (Func.isDeclaration()) + continue; + + if (Func.hasMetadata(ContHelper::MDContinuationName)) { + Candidates.push_back(&Func); + continue; } - auto *Terminator = BB->getTerminator(); - if (isa_and_nonnull(Terminator)) { - auto Before = --Terminator->getIterator(); - if (auto *ContinueCall = dyn_cast(Before)) { - if (Candidate != nullptr) { - LLVM_DEBUG(dbgs() << "Found multiple continue candidates after a " - "GetResumePointAddr:\n"; - Candidate->dump(); ContinueCall->dump()); - return {}; - } - Candidate = ContinueCall; - } else { - LLVM_DEBUG(dbgs() << "The BB must end in a (continue) call after a " - "GetResumePointAddr, but " - << BB->getName() << " doesn't"); - return {}; + if (lgc::cps::isCpsFunction(Func)) { + Candidates.push_back(&Func); + continue; + } + } + + SmallVector> MappedFuncs; + for (auto &F : Candidates) { + Function *Func = F; + + Value *Initializer = nullptr; + Builder.SetInsertPointPastAllocas(Func); + + Value *Csp = Builder.CreateAlloca(Builder.getInt32Ty()); + Csp->setName("csp"); + + // Do an early lookup to avoid cluttering the code with conditional lookups. + // This will only be abandoned if F is cloned. + // Store a pointer to the function data for convenience reasons. + auto FuncIt = ToProcess.find(F); + FunctionData *FuncData = + FuncIt != ToProcess.end() ? &FuncIt->second : nullptr; + + if (!ContHelper::isLegacyEntryFunction(F)) { + auto *FTy = F->getFunctionType(); + SmallVector NewArgTys{FTy->params()}; + + const size_t CspArgIndex = lgc::cps::isCpsFunction(*F) ? 1 : 0; + NewArgTys.insert(NewArgTys.begin() + CspArgIndex, Builder.getInt32Ty()); + + Function *NewFunc = CompilerUtils::mutateFunctionArguments( + *Func, Func->getReturnType(), NewArgTys, Func->getAttributes()); + + Argument *CspArg = NewFunc->getArg(CspArgIndex); + CspArg->setName("cspInit"); + Initializer = CspArg; + + MappedFuncs.push_back({Func, NewFunc}); + + for (unsigned Idx = 0; Idx < Func->arg_size(); ++Idx) { + // Skip the CSP argument during remapping. + Value *OldArg = Func->getArg(Idx); + Value *NewArg = NewFunc->getArg(Idx >= CspArgIndex ? Idx + 1 : Idx); + NewArg->takeName(OldArg); + OldArg->replaceAllUsesWith(NewArg); } + + // Finally, update the function pointer so we operate on the newly created + // function. + Func = NewFunc; + + // Do some bookkeeping to avoid issues with iterator invalidation possibly + // caused by inserting NewFunc into ToProcess. + + // If the function data exists, insert a new element, try to move the + // contents and return a pointer to the new space. If this invalidates the + // iterator, a new iterator is returned. Otherwise, just return a pointer + // to the possibly newly allocated storage. + if (FuncData) + FuncData = + &ToProcess.insert({NewFunc, std::move(*FuncData)}).first->second; + else + FuncData = &ToProcess.insert({NewFunc, {}}).first->second; + + FuncData->CspInitializerArg = CspArg; + } else { + // Init csp through intrinsic + auto *InitFun = GpurtLibrary->getFunction( + ContDriverFunc::GetContinuationStackAddrName); + assert(InitFun && "DXILContPostProcessPassImpl::addIncomingCsp: " + "_cont_GetContinuationStackAddr not found."); + assert(InitFun->arg_size() == 0 && + InitFun->getReturnType()->isIntegerTy(32)); + + Initializer = CrossInliner.inlineCall(Builder, InitFun).returnValue; } - for (auto *Succ : successors(BB)) { - if (Visited.contains(Succ)) - continue; - Visited.insert(Succ); - UnknownPreds.erase(Succ); - WorkList.push_back(Succ); + Builder.CreateStore(Initializer, Csp); + + assert(FuncData && "DXILContPostProcessPassImpl::addIncomingCsp: Expected " + "FuncData to point to existing storage!"); + FuncData->CspStorage = Csp; + + // Store the global memory base address. + if (StackAddrspace == ContStackAddrspace::Global) { + assert(GlobalMemBase && "DXILContPostProcessPassImpl::addIncomingCsp: " + "GlobalMemBase cannot be nullptr!"); + + auto *Base = CrossInliner.inlineCall(Builder, GlobalMemBase).returnValue; + auto *CspTy = Builder.getInt8Ty()->getPointerTo( + StackLowering->getLoweredCpsStackAddrSpace()); + FuncData->BasePointer = Builder.CreateIntToPtr(Base, CspTy); } } - if (Candidate == nullptr) { - LLVM_DEBUG( - dbgs() << "Did not find a continue call after a GetResumePointAddr\n"); - return {}; - } + // Replace references to the old function with references to the new (mapped) + // function. + while (!MappedFuncs.empty()) { + auto [OldFunc, NewFunc] = MappedFuncs.pop_back_val(); + for (User *U : make_early_inc_range(OldFunc->users())) { + if (auto *AsCRUser = dyn_cast(U)) { + Builder.SetInsertPoint(AsCRUser); + auto *NewAsCROp = + Builder.create(NewFunc); + U->replaceAllUsesWith(NewAsCROp); + AsCRUser->eraseFromParent(); + } + } + + OldFunc->replaceAllUsesWith( + ConstantExpr::getBitCast(NewFunc, OldFunc->getType())); + OldFunc->eraseFromParent(); - if (!UnknownPreds.empty()) { - LLVM_DEBUG(dbgs() << "Found more than one predecessor for the continue " - "call after a GetResumePointAddr:\n"; - for (auto *Pred - : UnknownPreds) Pred->dump();); - return {}; + ToProcess.erase(OldFunc); } - return Candidate; + return !Candidates.empty(); } void DXILContPostProcessPassImpl::lowerGetResumePointAddr(Function &F) { @@ -508,24 +591,38 @@ void DXILContPostProcessPassImpl::lowerGetResumePointAddr(Function &F) { // Instead of passing the resume address to the next continue call, // use it as the return value of GetResumePointAddr and remove it from // the continue arguments. - auto FoundContinueCall = findContinueCall(CInst); + auto FoundContinueCall = findDominatedContinueCall(CInst); if (!FoundContinueCall) { report_fatal_error("Did not find a continue call after a " "GetResumePointAddr"); } auto *ContinueCall = *FoundContinueCall; - auto Name = ContinueCall->getCalledFunction()->getName(); - if (Name != "continuation.continue" && Name != "continuation.waitContinue") - report_fatal_error("The BB must end in a continue call after a " - "GetResumePointAddr"); - bool HasWaitMask = Name == "continuation.waitContinue"; - unsigned ReturnAddrArgNum = HasWaitMask ? 3 : 2; - // Move up computation of the resume address - auto *ReturnAddr = ContinueCall->getArgOperand(ReturnAddrArgNum); - assert(ReturnAddr->getType() == Builder.getInt64Ty() && - "Unexpected return addr type!"); + Value *ReturnAddr = nullptr; + bool IsCpsFunc = lgc::cps::isCpsFunction(*CInst->getFunction()); + + // Only used for non-cps functions. + unsigned ReturnAddrArgNum = 1; + + if (auto *Jump = dyn_cast(ContinueCall); Jump) { + ReturnAddr = Builder.CreateZExt(Jump->getTarget(), Builder.getInt64Ty()); + } else { + auto Name = ContinueCall->getCalledFunction()->getName(); + + if (Name != "continuation.continue" && + Name != "continuation.waitContinue") + report_fatal_error("The BB must end in a continue call after a " + "GetResumePointAddr"); + + bool HasWaitMask = Name == "continuation.waitContinue"; + ReturnAddrArgNum = HasWaitMask ? 2 : 1; + // Move up computation of the resume address + ReturnAddr = ContinueCall->getArgOperand(ReturnAddrArgNum); + + assert((ReturnAddr->getType() == Builder.getInt64Ty()) && + "Unexpected return addr type!"); + } SmallVector MoveInstrs; if (auto *I = dyn_cast(ReturnAddr)) { @@ -547,6 +644,14 @@ void DXILContPostProcessPassImpl::lowerGetResumePointAddr(Function &F) { (*I)->moveBefore(CInst); CInst->replaceAllUsesWith(ReturnAddr); + + if (IsCpsFunc) { + CInst->eraseFromParent(); + continue; + } + + // Re-create the continuation.continue call without the return address + // argument. SmallVector Args; for (unsigned I = 0; I < ContinueCall->arg_size(); I++) { if (I != ReturnAddrArgNum) @@ -555,60 +660,73 @@ void DXILContPostProcessPassImpl::lowerGetResumePointAddr(Function &F) { Builder.SetInsertPoint(ContinueCall); auto *NewCall = Builder.CreateCall(ContinueCall->getCalledFunction(), Args); - // Copy metadata - SmallVector> MDs; - ContinueCall->getAllMetadata(MDs); - for (auto &MD : MDs) - NewCall->setMetadata(MD.first, MD.second); + NewCall->copyMetadata(*ContinueCall); CInst->eraseFromParent(); ContinueCall->eraseFromParent(); } +} - if (!GetResumePointAddr->use_empty()) - report_fatal_error("Unknown uses of GetResumePointAddr remain!"); +// Append the CSP argument to all continuation.continue and +// continuation.waitContinue calls. +bool DXILContPostProcessPassImpl::passOutgoingCsp() { + bool Changed = false; - // Delete the declaration of the intrinsic after lowering, as future calls to - // it are invalid. - GetResumePointAddr->eraseFromParent(); -} + collectContinueCalls(*Mod, ContinueCalls); -void DXILContPostProcessPassImpl::handleInitialContinuationStackPtr( - Function &F) { - auto *InitFun = GpurtLibrary->getFunction("_cont_GetContinuationStackAddr"); - assert(InitFun && "GetContinuationStackAddr not found"); - assert(InitFun->arg_size() == 0 && InitFun->getReturnType()->isIntegerTy(32)); - llvm::forEachCall(F, [&](CallInst &CInst) { - Builder.SetInsertPoint(&CInst); - auto *Init = Builder.CreateCall(InitFun); - CInst.replaceAllUsesWith(Init); - CrossInliner.inlineCall(*Init); - Builder.SetInsertPoint(&*Builder.GetInsertPoint()); - CInst.eraseFromParent(); - }); -} + Function *ContContinueFunc = Mod->getFunction("continuation.continue"); + Function *ContWaitContinueFunc = + Mod->getFunction("continuation.waitContinue"); -void DXILContPostProcessPassImpl::handleLgcRtIntrinsic(Function &F) { - // Look for known HLSL intrinsics - llvm::forEachCall(F, [&](CallInst &CInst) { - auto Data = ToProcess.find(CInst.getFunction()); - if (Data != ToProcess.end()) { - auto IntrImplEntry = llvm::findIntrImplEntryByIntrinsicCall(&CInst); - if (IntrImplEntry == std::nullopt) - return; + SmallVector NewContinueCalls; + NewContinueCalls.reserve(ContinueCalls.size()); + + for (auto *CInst : make_early_inc_range(ContinueCalls)) { + Function *Parent = CInst->getFunction(); - Data->second.IntrinsicCalls.push_back(&CInst); + auto Data = ToProcess.find(Parent); + if (Data == ToProcess.end()) { + LLVM_DEBUG( + dbgs() + << "DXILContPostProcessPassImpl::passOutgoingCsp: Did not find " + "function data for function " + << Parent->getName() << "!"); + continue; } - }); -} -void DXILContPostProcessPassImpl::handleRegisterBufferSetPointerBarrier( - Function &F, GlobalVariable *Payload) { - // Remove setpointerbarrier instructions related to payload - llvm::forEachCall(F, [&](CallInst &CInst) { - if (isCastGlobal(Payload, CInst.getOperand(0))) - CInst.eraseFromParent(); - }); + Value *CspStorage = Data->second.CspStorage; + + SmallVector NewCallArgs{CInst->args()}; + Builder.SetInsertPoint(CInst); + + // If the function does not use the stack, pass-through the CSP argument. + Value *Csp = nullptr; + + if (!CspStorage) + Csp = Data->second.CspInitializerArg; + else + Csp = Builder.CreateLoad(Builder.getInt32Ty(), CspStorage); + + bool IsWaitContinue = + CInst->getCalledFunction()->getName().contains("waitContinue"); + const size_t CspInsertIndex = IsWaitContinue ? 2 : 1; + NewCallArgs.insert(NewCallArgs.begin() + CspInsertIndex, Csp); + + auto *NewCall = Builder.CreateCall( + IsWaitContinue ? ContWaitContinueFunc : ContContinueFunc, NewCallArgs); + CInst->replaceAllUsesWith(NewCall); + NewCall->copyMetadata(*CInst); + CInst->eraseFromParent(); + + NewContinueCalls.push_back(NewCall); + + Changed = true; + } + + // The list of continue calls is now final. + ContinueCalls = std::move(NewContinueCalls); + + return Changed; } void DXILContPostProcessPassImpl::handleRegisterBufferGetPointer( @@ -620,13 +738,14 @@ void DXILContPostProcessPassImpl::handleRegisterBufferGetPointer( static_assert(FirstPayloadMemoryPointerRegister == 0, "Need to adjust offset here"); Builder.SetInsertPoint(&CInst); - auto *StackOffsetTy = getContinuationStackOffsetType(F.getContext()); + + Type *StackOffsetTy = CInst.getType(); + + // Load an addrspace(32) pointer from the payload global and let stack + // handling do the conversion into adds/muls and GEPs. auto *CastPayload = Builder.CreateBitOrPointerCast( Payload, StackOffsetTy->getPointerTo(Payload->getAddressSpace())); - auto *Offset = Builder.CreateLoad(StackOffsetTy, CastPayload); - auto *Ptr = continuationStackOffsetToPtr(Builder, Offset, *GpurtLibrary, - CrossInliner); - Ptr = Builder.CreateBitCast(Ptr, CInst.getType()); + Value *Ptr = Builder.CreateLoad(StackOffsetTy, CastPayload); CInst.replaceAllUsesWith(Ptr); CInst.eraseFromParent(); } @@ -659,6 +778,7 @@ void DXILContPostProcessPassImpl::handleValueGetI32(Function &F) { && F.getFunctionType()->getParamType(1)->isIntegerTy(32)); auto *I32 = Builder.getInt32Ty(); + llvm::forEachCall(F, [&](CallInst &CInst) { Builder.SetInsertPoint(&CInst); Value *Addr = @@ -693,15 +813,17 @@ void DXILContPostProcessPassImpl::handleValueSetI32(Function &F) { void DXILContPostProcessPassImpl::handleContPayloadRegisterI32Count( Function &F) { - assert(F.arg_size() == 0 + assert(F.arg_empty() // register count && F.getFunctionType()->getReturnType()->isIntegerTy(32)); - auto *RegCount = - ConstantInt::get(IntegerType::get(F.getContext(), 32), - Registers->getValueType()->getArrayNumElements()); + uint32_t RegCount = + ContHelper::tryGetMaxUsedPayloadRegisterCount(*Mod).value_or(0); + auto *RegCountAsConstant = + ConstantInt::get(IntegerType::get(F.getContext(), 32), RegCount); + llvm::forEachCall(F, [&](CallInst &CInst) { - CInst.replaceAllUsesWith(RegCount); + CInst.replaceAllUsesWith(RegCountAsConstant); CInst.eraseFromParent(); }); } @@ -743,56 +865,125 @@ void DXILContPostProcessPassImpl::handleContPayloadRegistersSetI32( }); } -void DXILContPostProcessPassImpl::handleContStackAlloc( +// Replace calls to _AmdContStack* with calls to lgc.cps dialect ops. +// Do some simple constant propagation on the fly. +void DXILContPostProcessPassImpl::handleContStackIntrinsic( FunctionAnalysisManager &FAM, Function &F) { - assert(F.getReturnType()->isIntegerTy(32) && - F.arg_size() == 2 - // csp - && F.getFunctionType()->getParamType(0)->isPointerTy() - // size - && F.getFunctionType()->getParamType(1)->isIntegerTy(32)); - llvm::forEachCall(F, [&](CallInst &CInst) { - Builder.SetInsertPoint(&CInst); - auto *Func = CInst.getFunction(); - Value *SizeArg = CInst.getArgOperand(1); - uint32_t Size; + // Check if the function is either of void return type or i32 return type and + // has no arguments or a single integer argument dividable by 32 (to allow + // storing and loading multiple dwords via AmdContStackLoad / + // AmdContStackStore). + Type *ReturnTy = F.getReturnType(); + (void)ReturnTy; + assert( + (ReturnTy->isVoidTy() || (ReturnTy->isIntegerTy() && + (ReturnTy->getIntegerBitWidth() % 32 == 0))) && + "DXILContPostProcessPassImpl::handleContStackIntrinsic: Invalid " + "return type!"); + + Type *FuncTy = F.getFunctionType(); + (void)(FuncTy); + assert((FuncTy->getFunctionNumParams() == 0 || + FuncTy->getFunctionParamType(0)->isIntegerTy()) && + "DXILContPostProcessPassImpl::handleContStackIntrinsic: Invalid " + "argument signature!"); + + StringRef FuncName = F.getName(); + FuncName.consume_front("_AmdContStack"); + + auto ConstantFoldInstruction = [&](Function *Parent, + Value *SizeArg) -> Value * { + if (!isa(SizeArg)) + return SizeArg; if (auto *I = dyn_cast(SizeArg)) { // Do some basic constant-propagation // This is needed because this pass just replaced the ValueI32Count // and ContPayloadRegistersI32Count intrinsics and the allocated size // usually depends on these values. - auto &DT = FAM.getResult(*Func); - auto &TLI = FAM.getResult(*Func); - auto &AC = FAM.getResult(*Func); - const SimplifyQuery SQ(Func->getParent()->getDataLayout(), &TLI, &DT, + auto &DT = FAM.getResult(*Parent); + auto &TLI = FAM.getResult(*Parent); + auto &AC = FAM.getResult(*Parent); + const SimplifyQuery SQ(Parent->getParent()->getDataLayout(), &TLI, &DT, &AC); if (auto *NewSize = simplifyInstruction(I, SQ)) - SizeArg = NewSize; + return NewSize; } - if (auto *C = dyn_cast(SizeArg)) - Size = C->getZExtValue(); - else - report_fatal_error("ContStackAlloc must be called with a constant " - "that can be computed at compile time"); + return SizeArg; + }; - auto *OrigVal = - Builder.CreateLoad(Builder.getInt32Ty(), CInst.getArgOperand(0)); + llvm::forEachCall(F, [&](CallInst &CInst) { + Value *Replacement = nullptr; + Builder.SetInsertPoint(&CInst); - auto *NewVal = Builder.CreateAdd(OrigVal, Builder.getInt32(Size)); - Builder.CreateStore(NewVal, CInst.getArgOperand(0)); - CInst.replaceAllUsesWith(OrigVal); - CInst.eraseFromParent(); + Type *DestTy = CInst.getType(); + + bool IsMemoryAccess = false; + if (FuncName == "Alloc") { + Value *SizeArg = + ConstantFoldInstruction(CInst.getFunction(), CInst.getArgOperand(0)); + Replacement = Builder.create(SizeArg); + + if (auto *Size = dyn_cast(SizeArg)) + ContHelper::addStackSize(CInst.getFunction(), Size->getSExtValue()); + } else if (FuncName == "Free") { + Value *SizeArg = + ConstantFoldInstruction(CInst.getFunction(), CInst.getArgOperand(0)); + Replacement = Builder.create(SizeArg); + } else if (FuncName == "SetPtr") { + Value *Vsp = CInst.getArgOperand(0); + Replacement = Builder.create(Builder.CreateIntToPtr( + Vsp, + PointerType::get(Builder.getInt8Ty(), lgc::cps::stackAddrSpace))); + } else if (FuncName == "GetPtr") { + Replacement = Builder.create(); + } else if (FuncName.starts_with("Load")) { + Value *Addr = + ConstantFoldInstruction(CInst.getFunction(), CInst.getArgOperand(0)); + Value *Ptr = Builder.CreateIntToPtr( + Addr, CInst.getType()->getPointerTo(lgc::cps::stackAddrSpace)); + Replacement = Builder.CreateAlignedLoad( + DestTy, Ptr, + Align(CpsStackLowering::getContinuationStackAlignment())); + + if (FuncName.starts_with("LoadLastUse")) + ContHelper::setIsLastUseLoad(*cast(Replacement)); + + IsMemoryAccess = true; + } else if (FuncName.starts_with("Store")) { + assert(FuncTy->getFunctionNumParams() == 2 && + "DXILContPostProcessPassImpl::handleContStackIntrinsic: Invalid " + "argument signature for AmdContStackStore!"); + + Value *Addr = + ConstantFoldInstruction(CInst.getFunction(), CInst.getArgOperand(0)); + Value *Val = CInst.getArgOperand(1); + Value *Ptr = Builder.CreateIntToPtr( + Addr, Val->getType()->getPointerTo(lgc::cps::stackAddrSpace)); + Builder.CreateAlignedStore( + Val, Ptr, Align(CpsStackLowering::getContinuationStackAlignment())); + + IsMemoryAccess = true; + } else { + llvm_unreachable("DXILContPostProcessPassImpl::handleContStackIntrinsic: " + "Unknown intrinsic!"); + } + + if (Replacement) { + if (!DestTy->isVoidTy() && !IsMemoryAccess) + Replacement = Builder.CreatePtrToInt(Replacement, DestTy); + + CInst.replaceAllUsesWith(Replacement); + } - // Add allocation to the stack size of this function - ContHelper::addStackSize(Func, Size); + CInst.eraseFromParent(); }); } -void DXILContPostProcessPassImpl::collectProcessableFunctions() { +void DXILContPostProcessPassImpl::initializeProcessableFunctionData() { for (Function &F : *Mod) { if (F.isDeclaration()) continue; @@ -813,13 +1004,19 @@ void DXILContPostProcessPassImpl::collectProcessableFunctions() { DXILShaderKind Kind = ShaderStageHelper::shaderStageToDxilShaderKind(*Stage); + const bool IsCpsFunction = lgc::cps::isCpsFunction(F); + switch (Kind) { case DXILShaderKind::RayGeneration: { FunctionData Data; Data.Kind = Kind; - Data.SystemDataArgumentIndex = SystemDataArgumentIndexRayGen; + + Data.SystemDataArgumentIndex = + !IsCpsFunction ? SystemDataArgumentIndexRayGen : CpsArgIdxSystemData; + Data.SystemDataTy = - F.getFunctionType()->getParamType(SystemDataArgumentIndexRayGen); + F.getFunctionType()->getParamType(Data.SystemDataArgumentIndex); + [[maybe_unused]] bool DidInsert = ToProcess.insert({&F, std::move(Data)}).second; assert(DidInsert); @@ -832,9 +1029,12 @@ void DXILContPostProcessPassImpl::collectProcessableFunctions() { case DXILShaderKind::Callable: { FunctionData Data; Data.Kind = Kind; - Data.SystemDataArgumentIndex = SystemDataArgumentIndexStart; + + Data.SystemDataArgumentIndex = + !IsCpsFunction ? SystemDataArgumentIndexStartWithoutCsp + : CpsArgIdxSystemData; Data.SystemDataTy = - F.getFunctionType()->getParamType(SystemDataArgumentIndexStart); + F.getFunctionType()->getParamType(Data.SystemDataArgumentIndex); [[maybe_unused]] bool DidInsert = ToProcess.insert({&F, std::move(Data)}).second; assert(DidInsert); @@ -856,9 +1056,13 @@ void DXILContPostProcessPassImpl::collectProcessableFunctions() { if (Stage && &F != EntryF) { FunctionData Data = ToProcess[EntryF]; Data.IsStart = false; - Data.SystemDataArgumentIndex = SystemDataArgumentIndexContinuation; - Data.SystemDataTy = - F.getArg(SystemDataArgumentIndexContinuation)->getType(); + + Data.SystemDataArgumentIndex = + !lgc::cps::isCpsFunction(F) + ? SystemDataArgumentIndexContinuationWithoutCsp + : CpsArgIdxSystemData; + + Data.SystemDataTy = F.getArg(Data.SystemDataArgumentIndex)->getType(); [[maybe_unused]] bool DidInsert = ToProcess.insert({&F, std::move(Data)}).second; assert(DidInsert); @@ -867,22 +1071,20 @@ void DXILContPostProcessPassImpl::collectProcessableFunctions() { } } -bool DXILContPostProcessPassImpl::handleIntrinsicCalls() { +bool DXILContPostProcessPassImpl::handleRegisterBufferCalls() { bool Changed = false; auto *Payload = Mod->getGlobalVariable(ContHelper::GlobalPayloadName); - // TODO: Dialectify. for (auto &F : Mod->functions()) { auto Name = F.getName(); - if (Name == "continuation.initialContinuationStackPtr") { - Changed = true; - handleInitialContinuationStackPtr(F); - } else if (Name.starts_with("lgc.rt")) { - Changed = true; - handleLgcRtIntrinsic(F); - } else if (Name.starts_with("registerbuffer.setpointerbarrier")) { - Changed = true; - handleRegisterBufferSetPointerBarrier(F, Payload); + if (Name.starts_with("registerbuffer.setpointerbarrier")) { + // Remove setpointerbarrier instructions related to payload + llvm::forEachCall(F, [&](CallInst &CInst) { + if (isCastGlobal(Payload, CInst.getOperand(0))) { + CInst.eraseFromParent(); + Changed = true; + } + }); } else if (Name.starts_with("registerbuffer.getpointer")) { Changed = true; handleRegisterBufferGetPointer(F, Payload); @@ -892,12 +1094,45 @@ bool DXILContPostProcessPassImpl::handleIntrinsicCalls() { return Changed; } +bool DXILContPostProcessPassImpl::handleIntrinsicCalls( + llvm::ModuleAnalysisManager &AnalysisManager) { + bool Changed = false; + + for (auto &F : Mod->functions()) { + auto Name = F.getName(); + if (Name.starts_with("lgc.rt")) { + // Search for known HLSL intrinsics + llvm::forEachCall(F, [&](CallInst &CInst) { + auto Data = ToProcess.find(CInst.getFunction()); + if (Data != ToProcess.end()) { + auto IntrImplEntry = llvm::findIntrImplEntryByIntrinsicCall(&CInst); + if (IntrImplEntry == std::nullopt) + return; + + Data->second.IntrinsicCalls.push_back(&CInst); + Changed = true; + } + }); + } else if (Name.contains("ContStack")) { + Changed = true; + + auto &FAM = + AnalysisManager.getResult(*Mod) + .getManager(); + + handleContStackIntrinsic(FAM, F); + } + } + + return Changed; +} + bool DXILContPostProcessPassImpl::replaceIntrinsicCalls( Function &F, const FunctionData &Data) { if (Data.IntrinsicCalls.empty()) return false; - auto *FuncTy = F.getFunctionType(); + [[maybe_unused]] auto *FuncTy = F.getFunctionType(); assert(FuncTy->getNumParams() > Data.SystemDataArgumentIndex && "Missing system data argument"); @@ -925,6 +1160,7 @@ DXILContPostProcessPassImpl::insertSetupRayGen(Function &F, return {false, &F}; auto *FuncTy = F.getFunctionType(); + assert(FuncTy->getNumParams() > Data.SystemDataArgumentIndex && "Missing system data argument"); @@ -982,6 +1218,46 @@ bool DXILContPostProcessPassImpl::replaceIntrinsicCallsAndSetupRayGen() { return Changed; } +// +// Entry point for all lgc.cps lowering. +// +bool DXILContPostProcessPassImpl::lowerCpsOps() { + SmallVector CpsFuncs; + + bool Changed = false; + + for (Function &Func : *Mod) { + if (Func.isDeclaration()) + continue; + + auto FuncData = ToProcess.find(&Func); + Value *CspStorage = nullptr; + if (FuncData != ToProcess.end()) + CspStorage = FuncData->second.CspStorage; + + if (!CspStorage) { + LLVM_DEBUG(dbgs() << "DXILContPostProcessPassImpl::lowerCpsOps: Did not " + "find the CSP storage alloca for " + << Func.getName() << ".\n"); + continue; + } + + // Do the actual stack lowering. + if (*StackAddrspace == ContStackAddrspace::Global) { + // Ensure loads and stores are getting mapped to global memory (by adding + // the global memory base address). + assert(FuncData->second.BasePointer && + "DXILContPostProcessPassImpl::lowerCpsOps: Requested access to " + "global memory but no base pointer provided!"); + StackLowering->setRealBasePointer(FuncData->second.BasePointer); + } + + StackLowering->lowerCpsStackOps(Func, CspStorage); + } + + return Changed; +} + bool DXILContPostProcessPassImpl::unfoldGlobals() { // Replace register globals with indices into a bigger global const auto &DL = Mod->getDataLayout(); @@ -1016,10 +1292,8 @@ bool DXILContPostProcessPassImpl::unfoldGlobals() { return false; } -bool DXILContPostProcessPassImpl::handleAmdInternals( - llvm::ModuleAnalysisManager &AnalysisManager) { +bool DXILContPostProcessPassImpl::handleAmdInternals() { bool Changed = false; - SmallVector ContStackAllocs; for (auto &F : Mod->functions()) { auto Name = F.getName(); @@ -1041,45 +1315,52 @@ bool DXILContPostProcessPassImpl::handleAmdInternals( } else if (Name.starts_with("_AmdContPayloadRegistersSetI32")) { Changed = true; handleContPayloadRegistersSetI32(F); - } else if (Name.starts_with("_AmdContStackAlloc")) { - Changed = true; - ContStackAllocs.push_back(&F); } } - if (!ContStackAllocs.empty()) { - auto &FAM = - AnalysisManager.getResult(*Mod) - .getManager(); - for (auto *F : ContStackAllocs) - handleContStackAlloc(FAM, *F); - } - return Changed; } DXILContPostProcessPassImpl::DXILContPostProcessPassImpl(Module &M, Module &GpurtLibrary) : Mod{&M}, GpurtLibrary{&GpurtLibrary}, - SetupRayGen{GpurtLibrary.getFunction("_cont_SetupRayGen")}, - Builder{Mod->getContext()} {} + SetupRayGen{GpurtLibrary.getFunction(ContDriverFunc::SetupRayGenName)}, + Builder{Mod->getContext()}, StackAddrspace{ + ContHelper::tryGetStackAddrspace(*Mod)} {} -bool DXILContPostProcessPassImpl::run( - llvm::ModuleAnalysisManager &AnalysisManager) { +bool DXILContPostProcessPassImpl::run(ModuleAnalysisManager &AnalysisManager) { bool Changed = false; - collectProcessableFunctions(); + StackLowering.emplace(Mod->getContext(), + static_cast(StackAddrspace.value())); + + if (*StackAddrspace == ContStackAddrspace::Global) + GlobalMemBase = getContinuationStackGlobalMemBase(*GpurtLibrary); - Changed |= handleIntrinsicCalls(); + initializeProcessableFunctionData(); + + Changed |= handleRegisterBufferCalls(); + Changed |= unfoldGlobals(); + Changed |= handleAmdInternals(); + Changed |= handleIntrinsicCalls(AnalysisManager); Changed |= replaceIntrinsicCallsAndSetupRayGen(); + Changed |= addIncomingCsp(); + for (auto &F : make_early_inc_range(*Mod)) { - if (F.getName().starts_with("_AmdGetResumePointAddr")) { + auto FuncName = F.getName(); + if (FuncName.starts_with("_AmdGetResumePointAddr")) { Changed = true; lowerGetResumePointAddr(F); + } else if (FuncName.starts_with("_AmdComplete")) { + Changed = true; + llvm::forEachCall(F, [&](llvm::CallInst &CInst) { + llvm::terminateShader(Builder, &CInst); + }); } } - Changed |= unfoldGlobals(); - Changed |= handleAmdInternals(AnalysisManager); + + Changed |= passOutgoingCsp(); + Changed |= lowerCpsOps(); Changed |= fixupDxilMetadata(*Mod); @@ -1087,14 +1368,14 @@ bool DXILContPostProcessPassImpl::run( Changed |= addGetAddrAndMDIntrinsicCalls(*Mod); #ifndef NDEBUG - checkContinuationsModule(*Mod); + checkContinuationsModule(*Mod, ContinueCalls); #endif if (ReportContStateSizes || ReportAllSizes) reportContStateSizes(*Mod); if (ReportPayloadRegisterSizes || ReportAllSizes) - reportPayloadSizes(*Mod); + reportPayloadSizes(*Mod, ContinueCalls); if (ReportSystemDataSizes || ReportAllSizes) reportSystemDataSizes(*Mod, ToProcess); @@ -1111,8 +1392,9 @@ DXILContPostProcessPass::run(llvm::Module &Module, LLVM_DEBUG(dbgs() << "Run the pass dxil-cont-post-process\n"); AnalysisManager.getResult(Module); - DXILContPostProcessPassImpl Impl{Module, - GpurtLibrary ? *GpurtLibrary : Module}; + auto &GpurtContext = lgc::GpurtContext::get(Module.getContext()); + DXILContPostProcessPassImpl Impl{ + Module, GpurtContext.theModule ? *GpurtContext.theModule : Module}; bool Changed = Impl.run(AnalysisManager); return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); diff --git a/shared/continuations/lib/DXILSupport.cpp b/llvmraytracing/lib/DXILSupport.cpp similarity index 98% rename from shared/continuations/lib/DXILSupport.cpp rename to llvmraytracing/lib/DXILSupport.cpp index d3c62a18a1..4ffbef44b4 100644 --- a/shared/continuations/lib/DXILSupport.cpp +++ b/llvmraytracing/lib/DXILSupport.cpp @@ -29,8 +29,8 @@ // //===----------------------------------------------------------------------===// -#include "continuations/Continuations.h" -#include "continuations/ContinuationsUtil.h" +#include "llvmraytracing/Continuations.h" +#include "llvmraytracing/ContinuationsUtil.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" diff --git a/shared/continuations/lib/GpurtContext.cpp b/llvmraytracing/lib/GpurtContext.cpp similarity index 97% rename from shared/continuations/lib/GpurtContext.cpp rename to llvmraytracing/lib/GpurtContext.cpp index c525d57cb3..fbd3d6a46f 100644 --- a/shared/continuations/lib/GpurtContext.cpp +++ b/llvmraytracing/lib/GpurtContext.cpp @@ -29,7 +29,7 @@ *********************************************************************************************************************** */ -#include "continuations/GpurtContext.h" +#include "llvmraytracing/GpurtContext.h" #include "llvm/IR/Module.h" using namespace llvm; diff --git a/shared/continuations/lib/GpurtDialect.cpp b/llvmraytracing/lib/GpurtDialect.cpp similarity index 100% rename from shared/continuations/lib/GpurtDialect.cpp rename to llvmraytracing/lib/GpurtDialect.cpp diff --git a/shared/continuations/lib/LegacyCleanupContinuations.cpp b/llvmraytracing/lib/LegacyCleanupContinuations.cpp similarity index 89% rename from shared/continuations/lib/LegacyCleanupContinuations.cpp rename to llvmraytracing/lib/LegacyCleanupContinuations.cpp index 64abd9c279..6ef4b7b2b3 100644 --- a/shared/continuations/lib/LegacyCleanupContinuations.cpp +++ b/llvmraytracing/lib/LegacyCleanupContinuations.cpp @@ -28,7 +28,7 @@ // Convert the result from the coroutine passes to something more suitable for // the compiler backend. // -// Instead of return values, use continue, waitContinue and complete intrinsics. +// Instead of return values, use continue and waitContinue intrinsics. // Add arguments to resume functions, which are the return values of the called // continuation. // @@ -37,12 +37,15 @@ //===----------------------------------------------------------------------===// #include "compilerutils/CompilerUtils.h" -#include "continuations/Continuations.h" -#include "continuations/ContinuationsDialect.h" +#include "lgc/LgcCpsDialect.h" #include "lgc/LgcRtDialect.h" +#include "llvm-dialects/Dialect/Builder.h" +#include "llvm-dialects/Dialect/Visitor.h" +#include "llvmraytracing/Continuations.h" +#include "llvmraytracing/ContinuationsDialect.h" +#include "llvmraytracing/ContinuationsUtil.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" @@ -59,8 +62,7 @@ namespace { class LegacyCleanupContinuationsPassImpl { public: LegacyCleanupContinuationsPassImpl( - llvm::Module &Mod, llvm::Module *GpurtLibrary, - llvm::ModuleAnalysisManager &AnalysisManager); + llvm::Module &Mod, llvm::ModuleAnalysisManager &AnalysisManager); llvm::PreservedAnalyses run(); @@ -100,17 +102,15 @@ class LegacyCleanupContinuationsPassImpl { Module &M; LLVMContext &Context; llvm::FunctionAnalysisManager &FAM; - IRBuilder<> B; + llvm_dialects::Builder B; Type *I32 = nullptr; Type *I64 = nullptr; Function *ContMalloc = nullptr; Function *ContFree = nullptr; Function *Continue = nullptr; Function *WaitContinue = nullptr; - Function *Complete = nullptr; MapVector ToProcess; uint32_t MaxContStateBytes = 0; - llvm::Module *GpurtLibrary = nullptr; CompilerUtils::CrossModuleInliner CrossInliner; }; @@ -221,6 +221,7 @@ void LegacyCleanupContinuationsPassImpl::finalizeContinuationData( Function &StartFunc, ContinuationData &FuncData) { if (FuncData.MallocCall) return; + for (auto *F : FuncData.Functions) { bool IsStart = (F == &StartFunc); // If this is the continuation start Value *ContFrame; @@ -371,9 +372,6 @@ void LegacyCleanupContinuationsPassImpl::processContinuation( } } else { B.SetInsertPoint(&*F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca()); - AllArgTypes.push_back( - getContinuationStackOffsetType(Context)); // continuation stack ptr - AllArgValues.push_back(nullptr); // Find arguments from continuation.returnvalue calls for (auto &I : F->getEntryBlock()) { @@ -507,49 +505,38 @@ void LegacyCleanupContinuationsPassImpl::handleFunctionEntry( uint64_t NeededStackSize = Data.getContStateStackBytes(); bool IsStart = F == Data.NewStart; - // We allocate continuation state on top of the payload. - // We plan to change this, but until we have done that, - // we need to "reverse peek" on top of the payload allocation - // that is going to be allocated later (also on function entry). int64_t StackOffsetForPayloadSpill = 0; if (IsStart) { // Add function metadata that stores how big the continuation state is in // bytes - ContHelper::setContinuationStateByteCount(*F, Data.ContStateBytes); - // At this point, stack size is exactly the payload spill size. - StackOffsetForPayloadSpill = ContHelper::tryGetStackSize(F).value_or(0); - if (NeededStackSize) { - // Add to continuation stack size metadata - ContHelper::addStackSize(F, NeededStackSize); - } - } else { - // Deallocate - if (NeededStackSize) - moveContinuationStackOffset(B, -NeededStackSize); + // Technically, continuation state includes the spilled payload here. + // However, we want to exclude it here for statistics. + uint32_t PayloadSpillSize = ContHelper::tryGetStackSize(F).value_or(0); + assert(Data.ContStateBytes >= PayloadSpillSize); + ContHelper::setContinuationStateByteCount(*F, Data.ContStateBytes - + PayloadSpillSize); } if (NeededStackSize) { + if (IsStart) { + ContHelper::setStackSize(F, NeededStackSize); + } else { + // Deallocate + B.create(B.getInt32(NeededStackSize)); + } + + Value *ContStateOnStack = + B.create(B.getInt32(-StackOffsetForPayloadSpill)); + uint64_t ContStateNumI32s = divideCeil(Data.ContStateBytes, RegisterBytes); auto *ContStateTy = ArrayType::get(I32, ContStateNumI32s); // Peek into CSP stack to obtain continuation state. // This can be handled in the same way for start and resume functions, // because for start functions we already allocated space above. - // - // Obtain current CSP - auto *CspOffsetPtr = B.CreateCall(getContinuationStackOffset(M)); - auto *CspType = getContinuationStackOffsetType(M.getContext()); - auto *CspAsOffset = B.CreateLoad(CspType, CspOffsetPtr); - auto *CspAsPtr = continuationStackOffsetToPtr( - B, CspAsOffset, *(GpurtLibrary ? GpurtLibrary : &M), CrossInliner); - Value *ContStateOnStack = B.CreateGEP( - B.getInt8Ty(), CspAsPtr, B.getInt64(StackOffsetForPayloadSpill)); - - Data.NewContState = B.CreateBitOrPointerCast( - ContStateOnStack, - ContStateTy->getPointerTo( - ContStateOnStack->getType()->getPointerAddressSpace()), + Data.NewContState = B.CreateBitCast( + ContStateOnStack, ContStateTy->getPointerTo(lgc::cps::stackAddrSpace), "cont.state"); } } @@ -563,7 +550,7 @@ void LegacyCleanupContinuationsPassImpl::handleFunctionEntry( /// to /// %resume_addr = ptrtoint i8* ... @fun.resume.0 to i64 /// %foo = ptrtoint %continuation.token* () @foo to i64 -/// call void @continuation.continue(i64 %foo, i8 addrspace(21)* %csp, i64 +/// call void @continuation.continue(i64 %foo, i64 /// %resume_addr, ) !continuation.registercount !0 /// unreachable /// @@ -601,16 +588,16 @@ void LegacyCleanupContinuationsPassImpl::handleSingleContinue( ContinuationData &Data, CallInst *Call, Value *ResumeFun) { // Pass resume address as argument B.SetInsertPoint(Call); - // Allocate CSP storage - uint64_t NeededStackSize = Data.getContStateStackBytes(); - if (NeededStackSize) - moveContinuationStackOffset(B, NeededStackSize); - auto *ReturnAddrInt = B.CreatePtrToInt(ResumeFun, I64); - auto *CpsType = getContinuationStackOffsetType(Call->getContext()); - auto *CspFun = getContinuationStackOffset(*Call->getModule()); + // Allocate continuation state + uint64_t NeededStackSize = Data.getContStateStackBytes(); + if (NeededStackSize) { + auto *ContStateAlloc = + B.create(B.getInt32(NeededStackSize)); + ContStateAlloc->setName("cont.state.stack.segment"); + } - auto *Csp = B.CreateLoad(CpsType, B.CreateCall(CspFun)); + auto *ReturnAddrInt = B.CreatePtrToInt(ResumeFun, I64); bool IsWait = ContHelper::isWaitAwaitCall(*Call); Function *ContinueFunction = IsWait ? WaitContinue : Continue; @@ -621,10 +608,10 @@ void LegacyCleanupContinuationsPassImpl::handleSingleContinue( // The wait mask is the first argument after the function pointer if (IsWait) Args.push_back(*Call->arg_begin()); - Args.push_back(Csp); Args.push_back(ReturnAddrInt); Args.append(Call->arg_begin() + (IsWait ? 1 : 0), Call->arg_end()); auto *ContinueCall = B.CreateCall(ContinueFunction, Args); + // Copy metadata, except for the wait flag, which is no longer needed. ContinueCall->copyMetadata(*Call); if (IsWait) @@ -633,6 +620,7 @@ void LegacyCleanupContinuationsPassImpl::handleSingleContinue( "Missing registercount metadata!"); // Remove instructions at the end of the block + auto *Unreachable = B.CreateUnreachable(); for (auto &I : make_early_inc_range(reverse(*ContinueCall->getParent()))) { if (&I == Unreachable) @@ -646,9 +634,7 @@ void LegacyCleanupContinuationsPassImpl::handleSingleContinue( /// unreachable /// to /// -/// call void @continuation.restore.continuation_state() -/// call void @continuation.continue(i64 %returnaddr, i8 addrspace(21)* %csp, -/// ) +/// call void @continuation.continue(i64 %returnaddr, ) /// unreachable void LegacyCleanupContinuationsPassImpl::handleReturn(ContinuationData &Data, CallInst *ContRet) { @@ -658,32 +644,28 @@ void LegacyCleanupContinuationsPassImpl::handleReturn(ContinuationData &Data, if (IsEntry) { assert(ContRet->arg_size() == 1 && "Entry functions ignore the return value"); - B.CreateCall(Complete); + llvm::terminateShader(B, ContRet); } else { + // Create the call to continuation.continue, but with the same argument list + // as for continuation.return. The CSP is appended during + // DXILContPostProcess. SmallVector Args(ContRet->args()); - auto *CspType = getContinuationStackOffsetType(ContRet->getContext()); - auto *CspFun = getContinuationStackOffset(*ContRet->getModule()); - auto *Csp = B.CreateLoad(CspType, B.CreateCall(CspFun)); - Args.insert(Args.begin() + 1, Csp); - auto *ContinueCall = B.CreateCall(Continue, Args); Data.NewReturnContinues.push_back(ContinueCall); ContinueCall->copyMetadata(*ContRet); assert(ContHelper::tryGetOutgoingRegisterCount(ContinueCall) && "Missing registercount metadata!"); + ContRet->eraseFromParent(); } - - ContRet->eraseFromParent(); } LegacyCleanupContinuationsPassImpl::LegacyCleanupContinuationsPassImpl( - llvm::Module &Mod, llvm::Module *GpurtLibrary, - llvm::ModuleAnalysisManager &AnalysisManager) + llvm::Module &Mod, llvm::ModuleAnalysisManager &AnalysisManager) : M{Mod}, Context{M.getContext()}, FAM{AnalysisManager.getResult(Mod) .getManager()}, - B{Context}, GpurtLibrary{GpurtLibrary} { + B{Context} { AnalysisManager.getResult(M); ContMalloc = M.getFunction("continuation.malloc"); ContFree = M.getFunction("continuation.free"); @@ -696,7 +678,6 @@ llvm::PreservedAnalyses LegacyCleanupContinuationsPassImpl::run() { for (auto &F : M.functions()) { if (F.empty()) continue; - if (auto *MD = F.getMetadata(ContHelper::MDContinuationName)) { analyzeContinuation(F, MD); } else if (lgc::rt::getLgcRtShaderStage(&F) == @@ -724,7 +705,6 @@ llvm::PreservedAnalyses LegacyCleanupContinuationsPassImpl::run() { I64 = Type::getInt64Ty(Context); Continue = getContinuationContinue(M); WaitContinue = getContinuationWaitContinue(M); - Complete = getContinuationComplete(M); for (auto &FuncData : ToProcess) { processContinuation(FuncData.first, FuncData.second); @@ -743,6 +723,7 @@ llvm::PreservedAnalyses LegacyCleanupContinuationsPassImpl::run() { llvm::PreservedAnalyses LegacyCleanupContinuationsPass::run( llvm::Module &Mod, llvm::ModuleAnalysisManager &AnalysisManager) { LLVM_DEBUG(dbgs() << "Run the cleanup-continuations pass\n"); - LegacyCleanupContinuationsPassImpl Impl(Mod, GpurtLibrary, AnalysisManager); + AnalysisManager.getResult(Mod); + LegacyCleanupContinuationsPassImpl Impl(Mod, AnalysisManager); return Impl.run(); } diff --git a/shared/continuations/lib/LgcCpsDialect.cpp b/llvmraytracing/lib/LgcCpsDialect.cpp similarity index 83% rename from shared/continuations/lib/LgcCpsDialect.cpp rename to llvmraytracing/lib/LgcCpsDialect.cpp index c84e7d93ec..7d55bb321b 100644 --- a/shared/continuations/lib/LgcCpsDialect.cpp +++ b/llvmraytracing/lib/LgcCpsDialect.cpp @@ -212,3 +212,41 @@ uint8_t lgc::cps::getPotentialCpsReturnLevels(lgc::cps::CpsShaderStage stage) { return static_cast(CpsLevels.to_ulong()); } + +// ===================================================================================================================== +// Push the state passed to a lgc::cps::jump op to the stack and return the new +// continuation stack pointer. Do nothing if there is no state to push. +void lgc::cps::pushStateToCpsStack(llvm_dialects::Builder &builder, + lgc::cps::JumpOp &jumpOp) { + Value *State = jumpOp.getState(); + + Type *StateType = State->getType(); + if (StateType->isEmptyTy()) + return; + + const DataLayout &DL = jumpOp.getModule()->getDataLayout(); + builder.SetInsertPoint(&jumpOp); + + Value *NewCsp = builder.create( + builder.getInt32(static_cast(DL.getTypeStoreSize(StateType)))); + builder.CreateStore(State, NewCsp); +} + +// ===================================================================================================================== +// Load the CPS state from the CPS stack. Reduces the stack pointer by the +// corresponding state size. Returns the popped state if eligible. If nothing +// can to be popped, return nullptr. Assume that the builder has its insertion +// point set after the CSP initializer. +Value *lgc::cps::popStateFromCpsStack(llvm_dialects::Builder &builder, + const DataLayout &DL, Type *stateType) { + if (stateType->isEmptyTy()) + return nullptr; + + ConstantInt *StateSize = + builder.getInt32(static_cast(DL.getTypeStoreSize(stateType))); + Value *StatePtr = builder.create(StateSize); + Value *NewState = builder.CreateLoad(stateType, StatePtr); + builder.create(StateSize); + + return NewState; +} diff --git a/shared/continuations/lib/LgcRtDialect.cpp b/llvmraytracing/lib/LgcRtDialect.cpp similarity index 100% rename from shared/continuations/lib/LgcRtDialect.cpp rename to llvmraytracing/lib/LgcRtDialect.cpp diff --git a/llvmraytracing/lib/LgcRtqDialect.cpp b/llvmraytracing/lib/LgcRtqDialect.cpp new file mode 100644 index 0000000000..726fe24d27 --- /dev/null +++ b/llvmraytracing/lib/LgcRtqDialect.cpp @@ -0,0 +1,41 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +// Implementation of the lgc.rtq dialect definition + +#include "lgc/LgcRtqDialect.h" + +#define GET_INCLUDES +#define GET_DIALECT_DEFS +#include "LgcRtqDialect.cpp.inc" + +using namespace llvm; +using namespace lgc; + +Type *lgc::rtq::getRayQueryType(LLVMContext &C) { + return IntegerType::get(C, 127); +} + +bool lgc::rtq::isRayQueryType(Type *Ty) { return Ty->isIntegerTy(127); } diff --git a/shared/continuations/lib/LowerAwait.cpp b/llvmraytracing/lib/LowerAwait.cpp similarity index 81% rename from shared/continuations/lib/LowerAwait.cpp rename to llvmraytracing/lib/LowerAwait.cpp index e9a422b26e..667cc8f475 100644 --- a/shared/continuations/lib/LowerAwait.cpp +++ b/llvmraytracing/lib/LowerAwait.cpp @@ -35,12 +35,12 @@ //===----------------------------------------------------------------------===// #include "compilerutils/CompilerUtils.h" -#include "continuations/Continuations.h" -#include "continuations/ContinuationsDialect.h" #include "lgc/LgcCpsDialect.h" #include "llvm-dialects/Dialect/Builder.h" #include "llvm-dialects/Dialect/Dialect.h" #include "llvm-dialects/Dialect/Visitor.h" +#include "llvmraytracing/Continuations.h" +#include "llvmraytracing/ContinuationsDialect.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/IRBuilder.h" @@ -53,19 +53,6 @@ using namespace llvm; #define DEBUG_TYPE "lower-await" -Function *llvm::getContinuationContinue(Module &M) { - auto *Name = "continuation.continue"; - if (auto *F = M.getFunction(Name)) - return F; - auto &C = M.getContext(); - auto *Void = Type::getVoidTy(C); - auto *I64 = Type::getInt64Ty(C); - auto *FuncTy = FunctionType::get(Void, {I64}, true); - AttributeList AL = AttributeList::get(C, AttributeList::FunctionIndex, - {Attribute::NoReturn}); - return cast(M.getOrInsertFunction(Name, FuncTy, AL).getCallee()); -} - Function *llvm::getContinuationWaitContinue(Module &M) { auto *Name = "continuation.waitContinue"; if (auto *F = M.getFunction(Name)) @@ -79,17 +66,6 @@ Function *llvm::getContinuationWaitContinue(Module &M) { return cast(M.getOrInsertFunction(Name, FuncTy, AL).getCallee()); } -Function *llvm::getContinuationComplete(Module &M) { - auto *Name = "continuation.complete"; - if (auto *F = M.getFunction(Name)) - return F; - auto &C = M.getContext(); - auto *Void = Type::getVoidTy(C); - AttributeList AL = AttributeList::get(C, AttributeList::FunctionIndex, - {Attribute::NoReturn}); - return cast(M.getOrInsertFunction(Name, AL, Void).getCallee()); -} - Function *llvm::getContinuationAwait(Module &M, Type *TokenTy, StructType *RetTy) { std::string Name = "await."; @@ -104,20 +80,6 @@ Function *llvm::getContinuationAwait(Module &M, Type *TokenTy, M.getOrInsertFunction(Name, AL, RetTy, TokenTy).getCallee()); } -Function *llvm::getContinuationCspInit(Module &M) { - auto *Name = "continuation.initialContinuationStackPtr"; - if (auto *F = M.getFunction(Name)) - return F; - auto &C = M.getContext(); - AttributeList AL = - AttributeList::get(C, AttributeList::FunctionIndex, - {Attribute::NoFree, Attribute::NoRecurse, - Attribute::NoUnwind, Attribute::WillReturn}); - return cast( - M.getOrInsertFunction(Name, AL, getContinuationStackOffsetType(C)) - .getCallee()); -} - static Function *getContinuationReturn(Module &M) { auto *Name = "continuation.return"; if (auto *F = M.getFunction(Name)) @@ -134,7 +96,7 @@ LowerAwaitPass::LowerAwaitPass() {} static void processContinuations( Module &M, const MapVector> &ToProcess, - bool LowerLgcAwait) { + bool IsLgcCpsMode) { // We definitely have a call that requires continuation in this function // // If this is the first time we've done this for this function @@ -167,15 +129,13 @@ static void processContinuations( // Change the return type and arguments SmallVector AllArgTypes; - // Lgc.cps dialect will handle stack pointer and return address in other - // places. + // Lgc.cps dialect will handle stack pointer and return address in + // DXILContPostProcessPass. bool IsLegacyNonEntry = - !F->hasMetadata(ContHelper::MDEntryName) && !LowerLgcAwait; - // Add continuation stack pointer and passed return address. - if (IsLegacyNonEntry) { - AllArgTypes.push_back(getContinuationStackOffsetType(Context)); + !ContHelper::isLegacyEntryFunction(F) && !IsLgcCpsMode; + // Add passed return address. + if (IsLegacyNonEntry) AllArgTypes.push_back(I64); - } for (auto const &Arg : F->args()) AllArgTypes.push_back(Arg.getType()); @@ -194,13 +154,12 @@ static void processContinuations( llvm::moveFunctionBody(*F, *NewFunc); // Set arg names for new function - if (IsLegacyNonEntry) { - NewFunc->getArg(0)->setName("cspInit"); - NewFunc->getArg(1)->setName("returnAddr"); - } + if (IsLegacyNonEntry) + NewFunc->getArg(0)->setName("returnAddr"); + for (unsigned Idx = 0; Idx != F->getFunctionType()->params().size(); ++Idx) { - Argument *Arg = NewFunc->getArg(Idx + (IsLegacyNonEntry ? 2 : 0)); + Argument *Arg = NewFunc->getArg(Idx + (IsLegacyNonEntry ? 1 : 0)); Argument *OldArg = F->getArg(Idx); Arg->setName(OldArg->getName()); OldArg->replaceAllUsesWith(Arg); @@ -264,7 +223,7 @@ static void processContinuations( for (auto *CI : FuncData.second) { B.SetInsertPoint(CI); Value *SuspendRetconArg = nullptr; - if (LowerLgcAwait) { + if (IsLgcCpsMode) { SmallVector Args; SmallVector ArgTys; for (Value *Arg : CI->args()) { @@ -277,6 +236,7 @@ static void processContinuations( auto *ShaderFun = B.CreateIntToPtr(CI->getArgOperand(0), ShaderTy->getPointerTo()); SuspendRetconArg = B.CreateCall(ShaderTy, ShaderFun, Args); + cast(SuspendRetconArg)->copyMetadata(*CI); } else { SuspendRetconArg = CI->getArgOperand(0); } @@ -294,9 +254,9 @@ static void processContinuations( // For lgc.cps, we don't need to save any value, so just not passing any // argument. Value *SavedRetAddr = nullptr; - if (!LowerLgcAwait) { + if (!IsLgcCpsMode) { if (IsLegacyNonEntry) - SavedRetAddr = NewFunc->getArg(1); // Return addr + SavedRetAddr = NewFunc->getArg(0); // Return addr else SavedRetAddr = UndefValue::get(I64); } @@ -309,11 +269,12 @@ static void processContinuations( B.SetInsertPoint(I); SmallVector RetVals; - if (!LowerLgcAwait) { + if (!IsLgcCpsMode) { RetVals.push_back(SavedRetAddr); if (I->getNumOperands() != 0) RetVals.push_back(I->getOperand(0)); } + auto *ContRetCall = B.CreateCall(ContRet, RetVals); // DXILCont passes use annotations on the ret to pass information // on the shader exit to later passes. Copy such metadata to the ContRet @@ -343,26 +304,24 @@ LowerAwaitPass::run(llvm::Module &M, }) .build(); Visitor.visit(ToProcess, M); - - bool LowerLgcAwait = !ToProcess.empty(); - if (!LowerLgcAwait) { - for (auto &F : M.functions()) { - if (!F.getName().starts_with("await.")) { - // Force processing annotated functions, even if they don't have await - // calls - if (F.hasMetadata(ContHelper::MDContinuationName)) - ToProcess[&F].size(); - continue; - } - for (auto *U : F.users()) { - if (auto *Inst = dyn_cast(U)) - ToProcess[Inst->getFunction()].push_back(Inst); - } + bool IsLgcCpsMode = !ToProcess.empty() || ContHelper::isLgcCpsModule(M); + + for (auto &F : M.functions()) { + if (!F.getName().starts_with("await.")) { + // Force processing annotated functions, even if they don't have await + // calls + if (F.hasMetadata(ContHelper::MDContinuationName)) + ToProcess[&F].size(); + continue; + } + for (auto *U : F.users()) { + if (auto *Inst = dyn_cast(U)) + ToProcess[Inst->getFunction()].push_back(Inst); } } if (!ToProcess.empty()) { - processContinuations(M, ToProcess, LowerLgcAwait); + processContinuations(M, ToProcess, IsLgcCpsMode); return PreservedAnalyses::none(); } return PreservedAnalyses::all(); diff --git a/shared/continuations/lib/LowerRaytracingPipeline.cpp b/llvmraytracing/lib/LowerRaytracingPipeline.cpp similarity index 92% rename from shared/continuations/lib/LowerRaytracingPipeline.cpp rename to llvmraytracing/lib/LowerRaytracingPipeline.cpp index 498ea09906..8308378230 100644 --- a/shared/continuations/lib/LowerRaytracingPipeline.cpp +++ b/llvmraytracing/lib/LowerRaytracingPipeline.cpp @@ -40,13 +40,15 @@ //===----------------------------------------------------------------------===// #include "compilerutils/CompilerUtils.h" -#include "continuations/Continuations.h" -#include "continuations/ContinuationsDialect.h" -#include "continuations/ContinuationsUtil.h" -#include "continuations/PayloadAccessQualifiers.h" +#include "lgc/LgcCpsDialect.h" #include "lgc/LgcRtDialect.h" #include "llvm-dialects/Dialect/OpSet.h" #include "llvm-dialects/Dialect/Visitor.h" +#include "llvmraytracing/Continuations.h" +#include "llvmraytracing/ContinuationsDialect.h" +#include "llvmraytracing/ContinuationsUtil.h" +#include "llvmraytracing/GpurtContext.h" +#include "llvmraytracing/PayloadAccessQualifiers.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -234,11 +236,17 @@ class ModuleMetadataState final { return MaxPayloadRegisterCount; } - uint32_t getMinPayloadRegisterCount() const { - return MinPayloadRegisterCount; + std::optional tryGetPreservedPayloadRegisterCount() const { + return PreservedPayloadRegisterCount; } - ContStackAddrspace getContStackAddrspace() const { return StackAddrspace; } + void updateMaxUsedPayloadRegisterCount(uint32_t Count) { + MaxUsedPayloadRegisterCount = std::max(Count, MaxUsedPayloadRegisterCount); + } + + uint32_t getMaxUsedPayloadRegisterCount() const { + return MaxUsedPayloadRegisterCount; + } bool isInLgcCpsMode() const { return IsInLgcCpsMode; } @@ -249,12 +257,18 @@ class ModuleMetadataState final { /// MaxPayloadRegisterCount is initialized from metadata. If there is none, /// use this default instead: static constexpr uint32_t DefaultPayloadRegisterCount = 30; - /// Maximum allowed number of registers to be used for the payload. + /// [In]: Maximum allowed number of registers to be used for the payload. + /// It is guaranteed that all modules in a pipeline share this value. uint32_t MaxPayloadRegisterCount = 0; - /// Minimum required number of payload registers. - uint32_t MinPayloadRegisterCount = 0; - /// The address space used for the continuations stack. - /// Either stack or global memory. + /// [In]: If known, the number of payload registers that need to be preserved + /// by functions that don't know the payload type, e.g. Traversal. + std::optional PreservedPayloadRegisterCount = {}; + /// [Out]: The maximum number of payload registers written or read by any + /// shader in the module. This excludes intersection shaders, which + /// just pass through an existing payload. + uint32_t MaxUsedPayloadRegisterCount = 0; + /// [In]: The address space used for the continuations stack. + /// Either stack or global memory. ContStackAddrspace StackAddrspace = ContHelper::DefaultStackAddrspace; /// If the module has lgc.cps.module metadata attached. @@ -294,6 +308,10 @@ class LowerRaytracingPipelinePassImpl final { /// The first store to the alloca'd system data. Instruction *SystemDataFirstStore = nullptr; Type *ReturnTy = nullptr; + + /// Storage for the spilled payload, which is put into the continuation + /// state and stored on the stack. + AllocaInst *SpilledPayload = nullptr; /// Maximum number of I32s required to store the outgoing payload in all /// CallShader or TraceRay (maximum over all TraceRay formats) calls uint32_t MaxOutgoingPayloadI32s = 0; @@ -400,9 +418,10 @@ class LowerRaytracingPipelinePassImpl final { void collectProcessableFunctions(); - Value *insertCpsAwait(Type *ReturnTy, Value *ShaderAddr, Instruction *Call, - ArrayRef Args, ContinuationCallType CallType, - lgc::cps::CpsShaderStage ShaderStage); + Instruction *insertCpsAwait(Type *ReturnTy, Value *ShaderAddr, + Instruction *Call, ArrayRef Args, + ContinuationCallType CallType, + lgc::cps::CpsShaderStage ShaderStage); MapVector ToProcess; Module *Mod; @@ -454,11 +473,16 @@ ModuleMetadataState::ModuleMetadataState(Module &Module) : Mod{Module} { // Check that if there is a required minimum number of payload registers, // it is compatible - auto MinRegisterCountFromMD = - ContHelper::tryGetMinPayloadRegisterCount(Module); - MinPayloadRegisterCount = - MinRegisterCountFromMD.value_or(MaxPayloadRegisterCount); - assert(MinPayloadRegisterCount <= MaxPayloadRegisterCount); + PreservedPayloadRegisterCount = + ContHelper::tryGetPreservedPayloadRegisterCount(Module); + assert(PreservedPayloadRegisterCount.value_or(MaxPayloadRegisterCount) <= + MaxPayloadRegisterCount); + + MaxUsedPayloadRegisterCount = + ContHelper::tryGetMaxUsedPayloadRegisterCount(Module).value_or(0); + if (PreservedPayloadRegisterCount.has_value()) + MaxUsedPayloadRegisterCount = std::max( + MaxUsedPayloadRegisterCount, PreservedPayloadRegisterCount.value()); // Import StackAddrspace from metadata if set, otherwise from default auto StackAddrspaceMD = ContHelper::tryGetStackAddrspace(Module); @@ -471,6 +495,7 @@ ModuleMetadataState::ModuleMetadataState(Module &Module) : Mod{Module} { /// stack address space that was derived by metadata as global state. void ModuleMetadataState::updateModuleMetadata() const { ContHelper::setMaxPayloadRegisterCount(Mod, MaxPayloadRegisterCount); + ContHelper::setMaxUsedPayloadRegisterCount(Mod, MaxUsedPayloadRegisterCount); ContHelper::setStackAddrspace(Mod, StackAddrspace); } @@ -497,7 +522,7 @@ convertShaderKindToCpsShaderStage(DXILShaderKind Kind) { } // Create a lgc.cps.await operation for a given shader address. -Value *LowerRaytracingPipelinePassImpl::insertCpsAwait( +Instruction *LowerRaytracingPipelinePassImpl::insertCpsAwait( Type *ReturnTy, Value *ShaderAddr, Instruction *Call, ArrayRef Args, ContinuationCallType CallType, CpsShaderStage ShaderStage) { @@ -729,6 +754,7 @@ void LowerRaytracingPipelinePassImpl::replaceReportHitCall(FunctionData &Data, cast(Data.ReturnTy)); Value *RetSystemData = Builder.CreateLoad(Data.ReturnTy, SystemData); + Instruction *Ret = nullptr; if (MetadataState.isInLgcCpsMode()) { uint32_t CpsRetLevel = getPotentialCpsReturnLevels( convertShaderKindToCpsShaderStage(Data.Kind)); @@ -738,19 +764,21 @@ void LowerRaytracingPipelinePassImpl::replaceReportHitCall(FunctionData &Data, // Argument list: %rcr, %shader-index, %system-data. SmallVector TailArgs = {PoisonValue::get(I32), PoisonValue::get(I32), RetSystemData}; - Builder.create( + Ret = Builder.create( F->getArg(CpsArgIdxReturnAddr), CpsRetLevel, PoisonValue::get(StructType::get(Builder.getContext())), TailArgs); Builder.CreateUnreachable(); } else { - auto *Ret = Builder.CreateRet(RetSystemData); - - // Assume worst-case payload size for Intersection. See the note on the - // incoming payload size. - ContHelper::setOutgoingRegisterCount( - Ret, MetadataState.getMaxPayloadRegisterCount()); + Ret = Builder.CreateRet(RetSystemData); } + // Assume worst-case payload size for Intersection. See the note on the + // incoming payload size. + ContHelper::setOutgoingRegisterCount( + Ret, MetadataState.getMaxPayloadRegisterCount()); + // Intentionally do NOT update MaxUsedPayloadRegisterCount: Intersection + // assumes the worst-case size, but this doesn't mean it actually occurs. + // Remove trailing unreachable Then->eraseFromParent(); } @@ -816,18 +844,9 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( if (OutgoingSerializationLayout) { // Set up the payload spill pointer if necessary if (OutgoingSerializationLayout->PayloadMemPointerNode) { - // If we have a mem pointer, then we need to allocate stack storage - // The reverse does not hold, as a different payload type in the same - // shader could require the allocation. assert(Data.PayloadSpillSize != 0 && "Inconsistent payload stack size"); - // Peek into the stack. This eventually will become lgc.cps.peek - auto *CspType = getContinuationStackOffsetType(Builder.getContext()); - auto *CspPtr = Builder.CreateCall(getContinuationStackOffset(*Mod)); - auto *Csp = Builder.CreateLoad(CspType, CspPtr); - Value *LocalPayloadMem = - Builder.CreateAdd(Csp, Builder.getInt32(-Data.PayloadSpillSize)); - + Value *LocalPayloadMem = Builder.CreatePtrToInt(Data.SpilledPayload, I32); #ifndef NDEBUG // Check that payload pointer exists and is in first position auto It = OutgoingSerializationLayout->NodeStorageInfos.find( @@ -842,6 +861,7 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( // Copy to payload storage Value *CastPayload = Builder.CreateBitCast( Payload, I32->getPointerTo(Payload->getAddressSpace())); + Builder.CreateStore(LocalPayloadMem, CastPayload); // Barrier to ensure that accesses to the potentially in-memory parts of // the payload are not re-ordered before this store. More precisely, later @@ -896,10 +916,12 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( Args.push_back(HitAttrs); } - Value *NewCall = nullptr; + Instruction *Annotatable = nullptr; + Instruction *NewCall = nullptr; if (MetadataState.isInLgcCpsMode()) { NewCall = insertCpsAwait(Call->getType(), ShaderAddr, Call, Args, CallType, convertShaderKindToCpsShaderStage(Data.Kind)); + Annotatable = NewCall; } else { auto *ShaderTy = FunctionType::get(TokenTy, ArgTys, false); auto *ShaderFun = @@ -909,14 +931,7 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( auto *Await = getContinuationAwait(*Mod, TokenTy, cast(SystemDataTy)); NewCall = Builder.CreateCall(Await, {Token}); - - // Annotate call with the number of registers used for payload - ContHelper::setOutgoingRegisterCount( - Token, std::min(OutgoingSerializationLayout - ? OutgoingSerializationLayout->NumStorageI32s - : MetadataState.getMaxPayloadRegisterCount(), - MetadataState.getMaxPayloadRegisterCount())); - ContHelper::setReturnedRegisterCount(Token, ReturnedRegisterCount.value()); + Annotatable = Token; // For WaitAwait, add metadata indicating that we wait. After coroutine // passes, we then generate a waitContinue on the awaited function. @@ -924,6 +939,21 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( ContHelper::setIsWaitAwaitCall(*Token); } + ContHelper::setReturnedRegisterCount(Annotatable, + ReturnedRegisterCount.value()); + + auto OutgoingRegisterCount = std::min( + OutgoingSerializationLayout ? OutgoingSerializationLayout->NumStorageI32s + : MetadataState.getMaxPayloadRegisterCount(), + MetadataState.getMaxPayloadRegisterCount()); + // Annotate call with the number of registers used for payload + ContHelper::setOutgoingRegisterCount(Annotatable, OutgoingRegisterCount); + if (OutgoingSerializationLayout) { + MetadataState.updateMaxUsedPayloadRegisterCount(OutgoingRegisterCount); + MetadataState.updateMaxUsedPayloadRegisterCount( + ReturnedRegisterCount.value()); + } + if (CallType != ContinuationCallType::AnyHit) { // Copy global payload back to local payload // Overwrite the local payload with poison first, to make sure it is not @@ -1229,7 +1259,6 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes( uint64_t InlineHitAttrsBytes = getInlineHitAttrsBytes(*GpurtLibrary); uint64_t InlineRegSize = InlineHitAttrsBytes / RegisterBytes; auto *RegTy = Builder.getIntNTy(RegisterBytes * 8); - auto *RegTyPtr = RegTy->getPointerTo(); // Hit attribute storage is split between inline hit attributes in system // data, and possibly some payload registers. In order to access inline hit @@ -1242,6 +1271,7 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes( Builder.SetInsertPoint( Builder.GetInsertBlock()->getParent()->getEntryBlock().getFirstNonPHI()); auto *InlineHitAttrsAlloc = Builder.CreateAlloca(InlineHitAttrsTy); + auto *RegTyPtr = RegTy->getPointerTo(InlineHitAttrsAlloc->getAddressSpace()); Builder.restoreIP(InsertPoint); auto *InlineHitAttrs = Builder.CreateBitCast(InlineHitAttrsAlloc, RegTyPtr); @@ -1353,14 +1383,11 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes( void LowerRaytracingPipelinePassImpl::createPayloadGlobal() { I32 = Type::getInt32Ty(*Context); - // Find maximum payload storage size: - // If there is a set minimum payload register count, rely on that value being - // large enough to ensure shaders in this module are compatible with other - // shaders they are going to be used with. Otherwise, use the maximum allowed - // number of payload registers (this is by default assigned to - // MinPayloadRegisterCount, if MinRegisterCount is not set on the module - // metadata.) Note: this influences the payload size in Traversal. - uint32_t MaxPayloadI32s = MetadataState.getMinPayloadRegisterCount(); + // Determine an upper bound on the maximum required size for the @PAYLOAD + // global. Its size doesn't have an important meaning, but it needs to be + // large enough for generated code in this pass. Later, the RegisterBufferPass + // will shrink the used global if necessary. + uint32_t MaxPayloadI32s = MetadataState.getMaxPayloadRegisterCount(); for (const auto &[_, FuncData] : ToProcess) { MaxPayloadI32s = std::max(MaxPayloadI32s, FuncData.MaxOutgoingPayloadI32s); if (FuncData.IncomingPayloadSerializationInfo) @@ -1381,8 +1408,7 @@ void LowerRaytracingPipelinePassImpl::createPayloadGlobal() { // into i32s RegisterBufferMD RMD; RMD.RegisterCount = MetadataState.getMaxPayloadRegisterCount(); - RMD.Addrspace = - static_cast(MetadataState.getContStackAddrspace()); + RMD.Addrspace = lgc::cps::stackAddrSpace; auto *MD = createRegisterBufferMetadata(*Context, RMD); Payload->addMetadata("registerbuffer", *MD); @@ -1391,9 +1417,13 @@ void LowerRaytracingPipelinePassImpl::createPayloadGlobal() { } void LowerRaytracingPipelinePassImpl::setGpurtEntryRegisterCountMetadata() { - const uint32_t MaxRegisterCount = std::min( - static_cast(Payload->getValueType()->getArrayNumElements()), - MetadataState.getMaxPayloadRegisterCount()); + // Even if PreservedPayloadRegisterCount is set, there may be + // additional shaders in the current module whose usage is recorded + // in MaxUsedPayloadRegisterCount, to take the max with it. + uint32_t MaxRegisterCount = + std::max(MetadataState.tryGetPreservedPayloadRegisterCount().value_or( + MetadataState.getMaxPayloadRegisterCount()), + MetadataState.getMaxUsedPayloadRegisterCount()); for (const auto &Name : {"continuation.continue", "continuation.waitContinue"}) { @@ -1429,11 +1459,13 @@ void LowerRaytracingPipelinePassImpl::setGpurtEntryRegisterCountMetadata() { assert(!ContHelper::tryGetOutgoingRegisterCount(CI).has_value() && "Unexpected register count metadata"); ContHelper::setOutgoingRegisterCount(CI, OutRegisterCount); + MetadataState.updateMaxUsedPayloadRegisterCount(OutRegisterCount); assert(ContHelper::tryGetIncomingRegisterCount(Func).value_or( InRegisterCount) == InRegisterCount && "Unexpected incoming register count on Traversal"); ContHelper::setIncomingRegisterCount(Func, InRegisterCount); + MetadataState.updateMaxUsedPayloadRegisterCount(InRegisterCount); } } } @@ -1452,13 +1484,16 @@ void LowerRaytracingPipelinePassImpl::processFunctionEntry( // See also the system data documentation at the top of Continuations.h. Data.SystemData = Builder.CreateAlloca(Data.SystemDataTy); Data.SystemData->setName("system.data.alloca"); - // Initialize system data by copying the argument - Data.SystemDataFirstStore = - Builder.CreateStore(SystemDataArgument, Data.SystemData); // Allocate payload spilling space if (Data.PayloadSpillSize > 0) - moveContinuationStackOffset(Builder, Data.PayloadSpillSize); + Data.SpilledPayload = Builder.CreateAlloca( + ArrayType::get(I32, divideCeil(Data.PayloadSpillSize, RegisterBytes)), + nullptr, "payload.spill.alloca"); + + // Initialize system data by copying the argument + Data.SystemDataFirstStore = + Builder.CreateStore(SystemDataArgument, Data.SystemData); } void LowerRaytracingPipelinePassImpl::processFunctionEnd( @@ -1537,9 +1572,6 @@ void LowerRaytracingPipelinePassImpl::processFunctionEnd( } } - if (Data.PayloadSpillSize > 0) - moveContinuationStackOffset(Builder, -Data.PayloadSpillSize); - Value *RetValue = nullptr; if (!Data.ReturnTy->isVoidTy()) { auto *SystemData = @@ -1548,6 +1580,7 @@ void LowerRaytracingPipelinePassImpl::processFunctionEnd( RetValue = Builder.CreateLoad(Data.ReturnTy, SystemData); } + Instruction *Ret = nullptr; if (MetadataState.isInLgcCpsMode()) { uint32_t CpsRetLevel = getPotentialCpsReturnLevels( convertShaderKindToCpsShaderStage(Data.Kind)); @@ -1570,10 +1603,11 @@ void LowerRaytracingPipelinePassImpl::processFunctionEnd( Builder.CreateUnreachable(); } } else { - Instruction *Ret = - RetValue ? Builder.CreateRet(RetValue) : Builder.CreateRetVoid(); + Ret = RetValue ? Builder.CreateRet(RetValue) : Builder.CreateRetVoid(); + } - // Annotate ret with number of outgoing payload registers. + if (Ret) { + // Annotate the terminator with number of outgoing payload registers. // This annotation will be passed along the following transformations, // ending up at the final continuation call. unsigned OutgoingRegisterCount = @@ -1582,6 +1616,8 @@ void LowerRaytracingPipelinePassImpl::processFunctionEnd( MetadataState.getMaxPayloadRegisterCount()) : MetadataState.getMaxPayloadRegisterCount(); ContHelper::setOutgoingRegisterCount(Ret, OutgoingRegisterCount); + if (EData.OutgoingSerializationLayout) + MetadataState.updateMaxUsedPayloadRegisterCount(OutgoingRegisterCount); } EData.Terminator->eraseFromParent(); @@ -1667,6 +1703,10 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, Function *NewFunc = CompilerUtils::cloneFunctionHeader( *F, NewFuncTy, ArrayRef{}); NewFunc->takeName(F); + // FIXME: Remove !types metadata to workaround an llvm bug. If struct types + // are referenced only from metadataa, LLVM omits the type declaration when + // printing IR and fails to read it back in because of an unknown type. + NewFunc->setMetadata("types", nullptr); llvm::moveFunctionBody(*F, *NewFunc); @@ -1744,9 +1784,11 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, // TODO Read payload argument for lgc continuations } else { // Annotate function with the number of registers for incoming payload - ContHelper::setIncomingRegisterCount( - NewFunc, std::min(IncomingSerializationLayout.NumStorageI32s, - MetadataState.getMaxPayloadRegisterCount())); + auto IncomingRegisterCount = + std::min(IncomingSerializationLayout.NumStorageI32s, + MetadataState.getMaxPayloadRegisterCount()); + ContHelper::setIncomingRegisterCount(NewFunc, IncomingRegisterCount); + MetadataState.updateMaxUsedPayloadRegisterCount(IncomingRegisterCount); // Copy global payload into local payload at start of shader if (IncomingSerializationLayout.NumStorageI32s) { @@ -1821,6 +1863,7 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, // payload size of shaders in pipelines this shader is used in. ContHelper::setIncomingRegisterCount( NewFunc, MetadataState.getMaxPayloadRegisterCount()); + // Intentionally do NOT update MaxUsedPayloadRegisterCount } } @@ -2039,7 +2082,7 @@ void LowerRaytracingPipelinePassImpl::handleUnrematerializableCandidates() { // Collect GPURT functions and do precondition checks on the fly. void LowerRaytracingPipelinePassImpl::collectGpuRtFunctions() { - IsEndSearch = GpurtLibrary->getFunction("_cont_IsEndSearch"); + IsEndSearch = GpurtLibrary->getFunction(ContDriverFunc::IsEndSearchName); if (IsEndSearch) assert(IsEndSearch->getReturnType() == Type::getInt1Ty(*Context) && IsEndSearch->arg_size() == 1 @@ -2047,7 +2090,7 @@ void LowerRaytracingPipelinePassImpl::collectGpuRtFunctions() { && IsEndSearch->getFunctionType()->getParamType(0)->isPointerTy()); GetTriangleHitAttributes = - GpurtLibrary->getFunction("_cont_GetTriangleHitAttributes"); + GpurtLibrary->getFunction(ContDriverFunc::GetTriangleHitAttributesName); if (GetTriangleHitAttributes) assert(GetTriangleHitAttributes->getReturnType() ->isStructTy() // BuiltinTriangleIntersectionAttributes @@ -2058,7 +2101,7 @@ void LowerRaytracingPipelinePassImpl::collectGpuRtFunctions() { ->isPointerTy()); SetTriangleHitAttributes = - GpurtLibrary->getFunction("_cont_SetTriangleHitAttributes"); + GpurtLibrary->getFunction(ContDriverFunc::SetTriangleHitAttributesName); if (SetTriangleHitAttributes) assert(SetTriangleHitAttributes->getReturnType()->isVoidTy() && SetTriangleHitAttributes->arg_size() == 2 @@ -2067,11 +2110,15 @@ void LowerRaytracingPipelinePassImpl::collectGpuRtFunctions() { ->getParamType(0) ->isPointerTy() // BuiltinTriangleIntersectionAttributes - && SetTriangleHitAttributes->getFunctionType() - ->getParamType(1) - ->isStructTy()); - - GetLocalRootIndex = GpurtLibrary->getFunction("_cont_GetLocalRootIndex"); + && (SetTriangleHitAttributes->getFunctionType() + ->getParamType(1) + ->isStructTy() || + SetTriangleHitAttributes->getFunctionType() + ->getParamType(1) + ->isPointerTy())); + + GetLocalRootIndex = + GpurtLibrary->getFunction(ContDriverFunc::GetLocalRootIndexName); if (GetLocalRootIndex) assert( GetLocalRootIndex->getReturnType() == @@ -2083,19 +2130,19 @@ void LowerRaytracingPipelinePassImpl::collectGpuRtFunctions() { SetLocalRootIndex = getSetLocalRootIndex(*Mod); - SetupRayGen = GpurtLibrary->getFunction("_cont_SetupRayGen"); + SetupRayGen = GpurtLibrary->getFunction(ContDriverFunc::SetupRayGenName); if (SetupRayGen) assert(SetupRayGen->getReturnType()->isStructTy() && SetupRayGen->arg_empty()); - TraceRay = GpurtLibrary->getFunction("_cont_TraceRay"); + TraceRay = GpurtLibrary->getFunction(ContDriverFunc::TraceRayName); if (TraceRay) assert(TraceRay->getReturnType()->isVoidTy() && TraceRay->arg_size() == 15 // Dispatch data && TraceRay->getFunctionType()->getParamType(0)->isPointerTy()); - CallShader = GpurtLibrary->getFunction("_cont_CallShader"); + CallShader = GpurtLibrary->getFunction(ContDriverFunc::CallShaderName); if (CallShader) assert(CallShader->getReturnType()->isVoidTy() && CallShader->arg_size() == 2 @@ -2105,25 +2152,26 @@ void LowerRaytracingPipelinePassImpl::collectGpuRtFunctions() { && CallShader->getFunctionType()->getParamType(1) == Type::getInt32Ty(*Context)); - ReportHit = GpurtLibrary->getFunction("_cont_ReportHit"); + ReportHit = GpurtLibrary->getFunction(ContDriverFunc::ReportHitName); if (ReportHit) assert(ReportHit->getReturnType()->isIntegerTy(1) && ReportHit->arg_size() == 3 // Traversal data && ReportHit->getFunctionType()->getParamType(0)->isPointerTy()); - AcceptHit = GpurtLibrary->getFunction("_cont_AcceptHit"); + AcceptHit = GpurtLibrary->getFunction(ContDriverFunc::AcceptHitName); if (AcceptHit) assert(AcceptHit->getReturnType()->isVoidTy() && AcceptHit->arg_size() == 1 // Traversal data && AcceptHit->getFunctionType()->getParamType(0)->isPointerTy()); - GetSbtAddress = GpurtLibrary->getFunction("_cont_GetSbtAddress"); + GetSbtAddress = GpurtLibrary->getFunction(ContDriverFunc::GetSbtAddressName); if (GetSbtAddress) assert(GetSbtAddress->getReturnType()->isIntegerTy(64) && GetSbtAddress->arg_empty()); - GetSbtStride = GpurtLibrary->getFunction("_cont_GetSbtStride"); + + GetSbtStride = GpurtLibrary->getFunction(ContDriverFunc::GetSbtStrideName); if (GetSbtStride) assert(GetSbtStride->getReturnType()->isIntegerTy(32) && GetSbtStride->arg_empty()); @@ -2137,8 +2185,6 @@ LowerRaytracingPipelinePassImpl::LowerRaytracingPipelinePassImpl( MetadataState.getMaxPayloadRegisterCount()} {} bool LowerRaytracingPipelinePassImpl::run() { - MetadataState.updateModuleMetadata(); - collectGpuRtFunctions(); collectProcessableFunctions(); @@ -2216,7 +2262,7 @@ bool LowerRaytracingPipelinePassImpl::run() { if (ReportHit) TraversalDataTy = getFuncArgPtrElementType(ReportHit, 0); HitMissDataTy = nullptr; - if (auto *HitKind = GpurtLibrary->getFunction("_cont_HitKind")) { + if (auto *HitKind = GpurtLibrary->getFunction(ContDriverFunc::HitKindName)) { HitMissDataTy = getFuncArgPtrElementType(HitKind, 0); LLVM_DEBUG(dbgs() << "HitMiss system data from _cont_HitKind: "; HitMissDataTy->dump()); @@ -2239,9 +2285,9 @@ bool LowerRaytracingPipelinePassImpl::run() { // For tests, remove intrinsic implementations from the module for (auto &F : make_early_inc_range(*Mod)) { auto Name = F.getName(); - if (Name.starts_with("_cont_TraceRay") || - Name.starts_with("_cont_CallShader") || - Name.starts_with("_cont_ReportHit")) { + if (Name.starts_with(ContDriverFunc::TraceRayName) || + Name.starts_with(ContDriverFunc::CallShaderName) || + Name.starts_with(ContDriverFunc::ReportHitName)) { F.eraseFromParent(); } } @@ -2253,6 +2299,8 @@ bool LowerRaytracingPipelinePassImpl::run() { llvm::removeUnusedFunctionDecls(Mod); + MetadataState.updateModuleMetadata(); + return true; } @@ -2284,7 +2332,9 @@ LowerRaytracingPipelinePass::run(llvm::Module &M, LLVM_DEBUG(dbgs() << "Run the pass lower-raytracing-pipeline\n"); AnalysisManager.getResult(M); - LowerRaytracingPipelinePassImpl Impl(M, GpurtLibrary ? *GpurtLibrary : M); + auto &GpurtContext = lgc::GpurtContext::get(M.getContext()); + LowerRaytracingPipelinePassImpl Impl( + M, GpurtContext.theModule ? *GpurtContext.theModule : M); bool Changed = Impl.run(); return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); diff --git a/shared/continuations/lib/PassRegistry.inc b/llvmraytracing/lib/PassRegistry.inc similarity index 97% rename from shared/continuations/lib/PassRegistry.inc rename to llvmraytracing/lib/PassRegistry.inc index 3c010e88a5..8d3220bcf9 100644 --- a/shared/continuations/lib/PassRegistry.inc +++ b/llvmraytracing/lib/PassRegistry.inc @@ -60,7 +60,6 @@ CONT_MODULE_PASS("dxil-cont-pre-hook", DXILContPreHookPass()) CONT_MODULE_PASS("lower-await", LowerAwaitPass()) CONT_MODULE_PASS("register-buffer", RegisterBufferPass()) CONT_MODULE_PASS("remove-types-metadata", RemoveTypesMetadataPass()) -CONT_MODULE_PASS("save-continuation-state", SaveContinuationStatePass()) CONT_CGSCC_PASS("dxil-coro-split", DXILCoroSplitPass()) CONT_CGSCC_PASS("lgc-coro-split", LgcCoroSplitPass()) diff --git a/shared/continuations/lib/PayloadAccessQualifiers.cpp b/llvmraytracing/lib/PayloadAccessQualifiers.cpp similarity index 99% rename from shared/continuations/lib/PayloadAccessQualifiers.cpp rename to llvmraytracing/lib/PayloadAccessQualifiers.cpp index 84c3ac40b7..53117482e6 100644 --- a/shared/continuations/lib/PayloadAccessQualifiers.cpp +++ b/llvmraytracing/lib/PayloadAccessQualifiers.cpp @@ -34,8 +34,8 @@ // //===----------------------------------------------------------------------===// -#include "continuations/PayloadAccessQualifiers.h" -#include "continuations/Continuations.h" +#include "llvmraytracing/PayloadAccessQualifiers.h" +#include "llvmraytracing/Continuations.h" #include "llvm/ADT/EnumeratedArray.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" diff --git a/shared/continuations/lib/RegisterBuffer.cpp b/llvmraytracing/lib/RegisterBuffer.cpp similarity index 99% rename from shared/continuations/lib/RegisterBuffer.cpp rename to llvmraytracing/lib/RegisterBuffer.cpp index d59457b04a..23dfaaa867 100644 --- a/shared/continuations/lib/RegisterBuffer.cpp +++ b/llvmraytracing/lib/RegisterBuffer.cpp @@ -45,7 +45,7 @@ // // After the buffer is lowered, the memory pointer is accessed // through the intrinsics -// i32 addrspace(21)* @registerbuffer.getpointer.a20i32([20 x i32] +// i32 addrspace(32)* @registerbuffer.getpointer.a20i32([20 x i32] // addrspace(20)*) // A later pass needs to find these and change them to the actual memory // pointer. @@ -57,7 +57,7 @@ // //===----------------------------------------------------------------------===// -#include "continuations/Continuations.h" +#include "llvmraytracing/Continuations.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" diff --git a/shared/continuations/lib/RemoveTypesMetadata.cpp b/llvmraytracing/lib/RemoveTypesMetadata.cpp similarity index 98% rename from shared/continuations/lib/RemoveTypesMetadata.cpp rename to llvmraytracing/lib/RemoveTypesMetadata.cpp index b0c569ac1f..fcb3d47881 100644 --- a/shared/continuations/lib/RemoveTypesMetadata.cpp +++ b/llvmraytracing/lib/RemoveTypesMetadata.cpp @@ -29,7 +29,7 @@ // //===----------------------------------------------------------------------===// -#include "continuations/Continuations.h" +#include "llvmraytracing/Continuations.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/IRBuilder.h" diff --git a/shared/continuations/lib/TypesMetadata.cpp b/llvmraytracing/lib/TypesMetadata.cpp similarity index 99% rename from shared/continuations/lib/TypesMetadata.cpp rename to llvmraytracing/lib/TypesMetadata.cpp index b17d2c52a4..c06f116290 100644 --- a/shared/continuations/lib/TypesMetadata.cpp +++ b/llvmraytracing/lib/TypesMetadata.cpp @@ -29,7 +29,7 @@ // //===----------------------------------------------------------------------===// -#include "continuations/Continuations.h" +#include "llvmraytracing/Continuations.h" namespace llvm { diff --git a/llvmraytracing/plugin/CMakeLists.txt b/llvmraytracing/plugin/CMakeLists.txt new file mode 100644 index 0000000000..75b0d50abd --- /dev/null +++ b/llvmraytracing/plugin/CMakeLists.txt @@ -0,0 +1,11 @@ +set(LLVM_RAYTRACINGPLUGIN_LINK_INTO_TOOLS ON CACHE BOOL "Link raytracing plugin into tools" FORCE) + +add_llvm_pass_plugin(RaytracingPlugin + Plugin.cpp + + LINK_COMPONENTS + Support +) + +target_link_libraries(RaytracingPlugin PRIVATE LLVMRaytracing) +set_compiler_options(RaytracingPlugin) diff --git a/shared/continuations/plugin/Plugin.cpp b/llvmraytracing/plugin/Plugin.cpp similarity index 82% rename from shared/continuations/plugin/Plugin.cpp rename to llvmraytracing/plugin/Plugin.cpp index 757f573966..12ad6a3b93 100644 --- a/shared/continuations/plugin/Plugin.cpp +++ b/llvmraytracing/plugin/Plugin.cpp @@ -23,27 +23,27 @@ * **********************************************************************************************************************/ -//===- Plugin.cpp - LLVM plugin for continuation passes -------------------===// +//===- Plugin.cpp - LLVM plugin for raytracing passes ---------------------===// // -// Register continuation passes, so they can be used from opt. +// Register raytracing passes, so they can be used from opt. // //===----------------------------------------------------------------------===// -#include "continuations/Continuations.h" +#include "llvmraytracing/Continuations.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Passes/PassPlugin.h" // New PM registration -llvm::PassPluginLibraryInfo getContinuationsPluginPluginInfo() { - return {LLVM_PLUGIN_API_VERSION, "Continuations", LLVM_VERSION_STRING, +llvm::PassPluginLibraryInfo getRaytracingPluginPluginInfo() { + return {LLVM_PLUGIN_API_VERSION, "Raytracing", LLVM_VERSION_STRING, [](llvm::PassBuilder &PB) { llvm::ContHelper::RegisterPasses(PB, true); }}; } -#ifndef LLVM_CONTINUATIONSPLUGIN_LINK_INTO_TOOLS +#ifndef LLVM_RAYTRACINGPLUGIN_LINK_INTO_TOOLS extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() { - return getContinuationsPluginPluginInfo(); + return getRaytracingPluginPluginInfo(); } #endif diff --git a/llvmraytracing/test/CMakeLists.txt b/llvmraytracing/test/CMakeLists.txt new file mode 100644 index 0000000000..4d9bf9ac8d --- /dev/null +++ b/llvmraytracing/test/CMakeLists.txt @@ -0,0 +1,27 @@ +set(LLVMRAYTRACING_TEST_DEPENDS opt FileCheck count not) +add_custom_target(llvmraytracing-test-depends DEPENDS ${LLVMRAYTRACING_TEST_DEPENDS}) +set_target_properties(llvmraytracing-test-depends PROPERTIES FOLDER "Tests") + +# required by lit.site.cfg.py.in +set(LLVMRAYTRACING_TEST_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + +# required by configure_lit_site_cfg +set(LLVM_LIT_OUTPUT_DIR ${LLVM_TOOLS_BINARY_DIR}) +configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py + MAIN_CONFIG + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py +) + +add_lit_testsuite(check-llvmraytracing "Running the LLVM Raytracing regression tests" + ${CMAKE_CURRENT_BINARY_DIR} + ${exclude_from_check_all} + DEPENDS ${LLVMRAYTRACING_TEST_DEPENDS} +) +set_target_properties(check-llvmraytracing PROPERTIES FOLDER "Tests") + +add_lit_testsuites(LLVMRAYTRACING ${CMAKE_CURRENT_SOURCE_DIR} + ${exclude_from_check_all} + DEPENDS ${LLVMRAYTRACING_TEST_DEPENDS} +) diff --git a/llvmraytracing/test/dx/cleanup-continuations-malloc.ll b/llvmraytracing/test/dx/cleanup-continuations-malloc.ll new file mode 100644 index 0000000000..06ac764f00 --- /dev/null +++ b/llvmraytracing/test/dx/cleanup-continuations-malloc.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt --verify-each -passes='lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint' -S %s 2> %t.stderr | FileCheck %s +; RUN: count 0 < %t.stderr + +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" + +%continuation.token = type { } + +declare void @await.void(%continuation.token*) +declare %continuation.token* @async_fun() + +define <4 x i32> @simple_await(<4 x i32> %arg) !continuation.registercount !1 { +; CHECK-LABEL: define void @simple_await( +; CHECK-SAME: i64 [[RETURNADDR:%.*]], <4 x i32> [[ARG:%.*]]) !continuation.registercount [[META1:![0-9]+]] !continuation [[META2:![0-9]+]] !continuation.stacksize [[META3:![0-9]+]] !continuation.state [[META3]] { +; CHECK-NEXT: AllocaSpillBB: +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 +; CHECK-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; CHECK-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) +; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 +; CHECK-NEXT: unreachable +; + %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 + call void @await.void(%continuation.token* %tok) + ret <4 x i32> %arg, !continuation.registercount !1 +} + +define void @simple_await_entry(<4 x i32> %arg, <4 x i32> addrspace(1)* %mem) !continuation.entry !0 !continuation.registercount !1 { +; CHECK-LABEL: define void @simple_await_entry( +; CHECK-SAME: <4 x i32> [[ARG:%.*]], ptr addrspace(1) [[MEM:%.*]]) !continuation.registercount [[META1]] !continuation.entry [[META4:![0-9]+]] !continuation [[META5:![0-9]+]] !continuation.stacksize [[META3]] !continuation.state [[META3]] { +; CHECK-NEXT: AllocaSpillBB: +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CHECK-NEXT: [[MEM_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: store ptr addrspace(1) [[MEM]], ptr addrspace(32) [[MEM_SPILL_ADDR]], align 4 +; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 +; CHECK-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) +; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await_entry.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 +; CHECK-NEXT: unreachable +; + %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 + call void @await.void(%continuation.token* %tok) + store <4 x i32> %arg, <4 x i32> addrspace(1)* %mem + ret void, !continuation.registercount !1 +} + +!continuation.stackAddrspace = !{!2} + +!0 = !{} +!1 = !{i32 0} +!2 = !{i32 21} diff --git a/llvmraytracing/test/dx/cleanup-continuations.ll b/llvmraytracing/test/dx/cleanup-continuations.ll new file mode 100644 index 0000000000..3a3a9891ce --- /dev/null +++ b/llvmraytracing/test/dx/cleanup-continuations.ll @@ -0,0 +1,160 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 3 +; RUN: opt --verify-each -passes='legacy-cleanup-continuations,lint' -S %s 2> %t.stderr | FileCheck %s +; RUN: count 0 < %t.stderr + +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" + +%continuation.token = type { } +%await_with_ret_value.Frame = type { i64 } +%simple_await.Frame = type { i64 } +%simple_await_entry.Frame = type { } + +declare %continuation.token* @async_fun() +declare i32 @continuations.getReturnValue__i32() #0 +declare void @continuation.return(i64, ...) + +define { i8*, %continuation.token* } @simple_await(i8* %0) !continuation !0 !continuation.registercount !4 { +; CHECK-LABEL: define void @simple_await( +; CHECK-SAME: ) !continuation [[META1:![0-9]+]] !continuation.registercount [[META2:![0-9]+]] !continuation.stacksize [[META3:![0-9]+]] !continuation.state [[META3]] { +; CHECK-NEXT: AllocaSpillBB: +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[TMP0]] to ptr addrspace(32) +; CHECK-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[FRAMEPTR]], i32 0, i32 0 +; CHECK-NEXT: store i64 -1, ptr addrspace(32) [[DOTSPILL_ADDR]], align 4 +; CHECK-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 +; CHECK-NEXT: unreachable +; +AllocaSpillBB: + %FramePtr = bitcast i8* %0 to %simple_await.Frame* + %.spill.addr = getelementptr inbounds %simple_await.Frame, %simple_await.Frame* %FramePtr, i32 0, i32 0 + store i64 -1, i64* %.spill.addr, align 4 + %tok = call %continuation.token* @async_fun(), !continuation.registercount !4, !continuation.returnedRegistercount !4 + %1 = insertvalue { i8*, %continuation.token* } { i8* bitcast ({ i8*, %continuation.token* } (i8*, i1)* @simple_await.resume.0 to i8*), %continuation.token* undef }, %continuation.token* %tok, 1 + ret { i8*, %continuation.token* } %1 +} + +define internal { i8*, %continuation.token* } @simple_await.resume.0(i8* noalias nonnull align 16 dereferenceable(8) %0, i1 %1) !continuation !0 { +; CHECK-LABEL: define dso_local void @simple_await.resume.0( +; CHECK-SAME: ) !continuation [[META1]] !continuation.registercount [[META2]] { +; CHECK-NEXT: entryresume.0: +; CHECK-NEXT: call void @lgc.cps.free(i32 8) +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[TMP0]] to ptr addrspace(32) +; CHECK-NEXT: [[VFRAME:%.*]] = bitcast ptr addrspace(32) [[FRAMEPTR]] to ptr addrspace(32) +; CHECK-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[FRAMEPTR]], i32 0, i32 0 +; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i64, ptr addrspace(32) [[DOTRELOAD_ADDR]], align 4 +; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 [[DOTRELOAD]]), !continuation.registercount [[META2]] +; CHECK-NEXT: unreachable +; +entryresume.0: + %FramePtr = bitcast i8* %0 to %simple_await.Frame* + %vFrame = bitcast %simple_await.Frame* %FramePtr to i8* + %.reload.addr = getelementptr inbounds %simple_await.Frame, %simple_await.Frame* %FramePtr, i32 0, i32 0 + %.reload = load i64, i64* %.reload.addr, align 4 + call void (i64, ...) @continuation.return(i64 %.reload), !continuation.registercount !4 + unreachable +} + +define { i8*, %continuation.token* } @simple_await_entry(i8* %0) !continuation.entry !2 !continuation !3 !continuation.registercount !4 { +; CHECK-LABEL: define void @simple_await_entry( +; CHECK-SAME: ) !continuation [[META4:![0-9]+]] !continuation.registercount [[META2]] !continuation.entry [[META5:![0-9]+]] !continuation.stacksize [[META3]] !continuation.state [[META3]] { +; CHECK-NEXT: AllocaSpillBB: +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[TMP0]] to ptr addrspace(32) +; CHECK-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await_entry.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 +; CHECK-NEXT: unreachable +; +AllocaSpillBB: + %FramePtr = bitcast i8* %0 to %simple_await_entry.Frame* + %tok = call %continuation.token* @async_fun(), !continuation.registercount !4, !continuation.returnedRegistercount !4 + %1 = bitcast { i8*, %continuation.token* } (i8*, i1)* @simple_await_entry.resume.0 to i8* + %2 = insertvalue { i8*, %continuation.token* } undef, i8* %1, 0 + %3 = insertvalue { i8*, %continuation.token* } %2, %continuation.token* %tok, 1 + ret { i8*, %continuation.token* } %3 +} + +define internal { i8*, %continuation.token* } @simple_await_entry.resume.0(i8* noalias nonnull align 16 dereferenceable(8) %0, i1 %1) !continuation.entry !2 !continuation !3 { +; CHECK-LABEL: define dso_local void @simple_await_entry.resume.0( +; CHECK-SAME: ) !continuation [[META4]] !continuation.registercount [[META2]] { +; CHECK-NEXT: entryresume.0: +; CHECK-NEXT: call void @lgc.cps.free(i32 8) +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[TMP0]] to ptr addrspace(32) +; CHECK-NEXT: [[VFRAME:%.*]] = bitcast ptr addrspace(32) [[FRAMEPTR]] to ptr addrspace(32) +; CHECK-NEXT: ret void +; CHECK: entryresume.0.split: +; CHECK-NEXT: unreachable +; +entryresume.0: + %FramePtr = bitcast i8* %0 to %simple_await_entry.Frame* + %vFrame = bitcast %simple_await_entry.Frame* %FramePtr to i8* + call void (i64, ...) @continuation.return(i64 undef), !continuation.registercount !4 + unreachable +} + +define { i8*, %continuation.token* } @await_with_ret_value(i8* %0) !continuation !1 !continuation.registercount !4 { +; CHECK-LABEL: define void @await_with_ret_value( +; CHECK-SAME: ) !continuation [[META6:![0-9]+]] !continuation.registercount [[META2]] !continuation.stacksize [[META3]] !continuation.state [[META3]] { +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[TMP1]] to ptr addrspace(32) +; CHECK-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_RET_VALUE_FRAME:%.*]], ptr addrspace(32) [[FRAMEPTR]], i32 0, i32 0 +; CHECK-NEXT: store i64 -1, ptr addrspace(32) [[DOTSPILL_ADDR]], align 4 +; CHECK-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @await_with_ret_value.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 +; CHECK-NEXT: unreachable +; + %FramePtr = bitcast i8* %0 to %await_with_ret_value.Frame* + %.spill.addr = getelementptr inbounds %await_with_ret_value.Frame, %await_with_ret_value.Frame* %FramePtr, i32 0, i32 0 + store i64 -1, i64* %.spill.addr, align 4 + %tok = call %continuation.token* @async_fun(), !continuation.registercount !4, !continuation.returnedRegistercount !4 + %res = insertvalue { i8*, %continuation.token* } { i8* bitcast ({ i8*, %continuation.token* } (i8*, i1)* @await_with_ret_value.resume.0 to i8*), %continuation.token* undef }, %continuation.token* %tok, 1 + ret { i8*, %continuation.token* } %res +} + +define internal { i8*, %continuation.token* } @await_with_ret_value.resume.0(i8* noalias nonnull align 16 dereferenceable(8) %0, i1 %1) !continuation !1 { +; CHECK-LABEL: define dso_local void @await_with_ret_value.resume.0( +; CHECK-SAME: i32 [[RES1:%.*]]) !continuation [[META6]] !continuation.registercount [[META2]] { +; CHECK-NEXT: call void @lgc.cps.free(i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[TMP1]] to ptr addrspace(32) +; CHECK-NEXT: [[VFRAME:%.*]] = bitcast ptr addrspace(32) [[FRAMEPTR]] to ptr addrspace(32) +; CHECK-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_RET_VALUE_FRAME:%.*]], ptr addrspace(32) [[FRAMEPTR]], i32 0, i32 0 +; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i64, ptr addrspace(32) [[DOTRELOAD_ADDR]], align 4 +; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 [[DOTRELOAD]], i32 [[RES1]]), !continuation.registercount [[META2]] +; CHECK-NEXT: unreachable +; + %FramePtr = bitcast i8* %0 to %await_with_ret_value.Frame* + %vFrame = bitcast %await_with_ret_value.Frame* %FramePtr to i8* + %.reload.addr = getelementptr inbounds %await_with_ret_value.Frame, %await_with_ret_value.Frame* %FramePtr, i32 0, i32 0 + %.reload = load i64, i64* %.reload.addr, align 4 + %res = call i32 @continuations.getReturnValue__i32() + call void (i64, ...) @continuation.return(i64 %.reload, i32 %res), !continuation.registercount !4 + unreachable +} + +attributes #0 = { nounwind } + +!continuation.stackAddrspace = !{!5} + +!0 = !{{ i8*, %continuation.token* } (i8*)* @simple_await} +!1 = !{{ i8*, %continuation.token* } (i8*)* @await_with_ret_value} +!2 = !{} +!3 = !{{ i8*, %continuation.token* } (i8*)* @simple_await_entry} +!4 = !{i32 0} +!5 = !{i32 21} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { noreturn } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind willreturn memory(inaccessiblemem: read) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind willreturn memory(inaccessiblemem: readwrite) } +;. +; CHECK: [[META0:![0-9]+]] = !{i32 21} +; CHECK: [[META1]] = !{ptr @simple_await} +; CHECK: [[META2]] = !{i32 0} +; CHECK: [[META3]] = !{i32 8} +; CHECK: [[META4]] = !{ptr @simple_await_entry} +; CHECK: [[META5]] = !{} +; CHECK: [[META6]] = !{ptr @await_with_ret_value} +;. diff --git a/shared/continuations/test/dx/closest-hit-procedural.ll b/llvmraytracing/test/dx/closest-hit-procedural.ll similarity index 98% rename from shared/continuations/test/dx/closest-hit-procedural.ll rename to llvmraytracing/test/dx/closest-hit-procedural.ll index 5a625d8ff5..08024ba105 100644 --- a/shared/continuations/test/dx/closest-hit-procedural.ll +++ b/llvmraytracing/test/dx/closest-hit-procedural.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s ; RUN: count 0 < %t1.stderr ; Check a procedural closest hit shader with hit attributes that does not fit into system data alone -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } diff --git a/shared/continuations/test/dx/closest-hit-traceray.ll b/llvmraytracing/test/dx/closest-hit-traceray.ll similarity index 98% rename from shared/continuations/test/dx/closest-hit-traceray.ll rename to llvmraytracing/test/dx/closest-hit-traceray.ll index 95e8a64043..5904ac8f1f 100644 --- a/shared/continuations/test/dx/closest-hit-traceray.ll +++ b/llvmraytracing/test/dx/closest-hit-traceray.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s ; RUN: count 0 < %t0.stderr -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s ; RUN: count 0 < %t1.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } diff --git a/shared/continuations/test/dx/closest-hit.ll b/llvmraytracing/test/dx/closest-hit.ll similarity index 98% rename from shared/continuations/test/dx/closest-hit.ll rename to llvmraytracing/test/dx/closest-hit.ll index b3fd8ca1ff..a373d279c6 100644 --- a/shared/continuations/test/dx/closest-hit.ll +++ b/llvmraytracing/test/dx/closest-hit.ll @@ -2,7 +2,7 @@ ; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s ; RUN: count 0 < %t0.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } %struct.SystemData = type { %struct.DispatchSystemData } diff --git a/shared/continuations/test/dx/continuation-registercount.ll b/llvmraytracing/test/dx/continuation-registercount.ll similarity index 93% rename from shared/continuations/test/dx/continuation-registercount.ll rename to llvmraytracing/test/dx/continuation-registercount.ll index 23be659256..a3964eacd1 100644 --- a/shared/continuations/test/dx/continuation-registercount.ll +++ b/llvmraytracing/test/dx/continuation-registercount.ll @@ -1,21 +1,21 @@ ; RUN: grep -v SKIP_LINE_BY_DEFAULT %s | \ -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S 2> %t0.stderr | \ +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S 2> %t0.stderr | \ ; RUN: FileCheck -check-prefix=POSTPROCESS-REGCOUNT %s ; RUN: count 0 < %t0.stderr ; ; RUN: grep -v SKIP_LINE_BY_DEFAULT %s | \ -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S 2> %t1.stderr | \ +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S 2> %t1.stderr | \ ; RUN: FileCheck -check-prefix=POSTPROCESS-REGCOUNT2 %s ; RUN: count 0 < %t1.stderr ; -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t2.stderr | \ +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t2.stderr | \ ; RUN: FileCheck -check-prefix=POSTPROCESS-REGCOUNT-FEWREGS %s ; RUN: count 0 < %t2.stderr ; The order of metadata on functions is non-deterministic, so make two different runs to match both of them. ; The 'grep' commands filter out a metadata node that reduces the payload register count. -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.DispatchSystemData = type { i32 } @@ -145,14 +145,14 @@ define void @mainTrace() { } ; POSTPROCESS-REGCOUNT-DAG: define void @called({{.*}}%struct.DispatchSystemData %0){{.*}} !continuation.registercount ![[called_registercount:[0-9]+]] -; POSTPROCESS-REGCOUNT-DAG: define dso_local void @called.resume.0({{.*}}%struct.DispatchSystemData %1){{.*}} !continuation.registercount ![[called_resume_registercount:[0-9]+]] +; POSTPROCESS-REGCOUNT-DAG: define dso_local void @called.resume.0({{.*}}%struct.DispatchSystemData %0){{.*}} !continuation.registercount ![[called_resume_registercount:[0-9]+]] ; POSTPROCESS-REGCOUNT-DAG: ![[called_registercount]] = !{i32 26} ; POSTPROCESS-REGCOUNT-DAG: ![[called_resume_registercount]] = !{i32 27} ; If we set maxPayloadRegisterCount to 10, both functions use only 10 payload registers. ; Note that due to metadata uniquing, both use the same metadata node. ; POSTPROCESS-REGCOUNT-FEWREGS-DAG: define void @called({{.*}}%struct.DispatchSystemData %0){{.*}} !continuation.registercount ![[registercount:[0-9]+]] -; POSTPROCESS-REGCOUNT-FEWREGS-DAG: define dso_local void @called.resume.0({{.*}}%struct.DispatchSystemData %1){{.*}} !continuation.registercount ![[registercount]] +; POSTPROCESS-REGCOUNT-FEWREGS-DAG: define dso_local void @called.resume.0({{.*}}%struct.DispatchSystemData %0){{.*}} !continuation.registercount ![[registercount]] ; POSTPROCESS-REGCOUNT-FEWREGS-DAG: ![[registercount]] = !{i32 10} define void @called(%struct.MyParams* %arg) !types !39 { @@ -162,7 +162,7 @@ define void @called(%struct.MyParams* %arg) !types !39 { } ; POSTPROCESS-REGCOUNT-DAG: define void @Intersection({{.*}}%struct.AnyHitTraversalData %0){{.*}} !continuation.registercount ![[intersection_registercount:[0-9]+]] -; POSTPROCESS-REGCOUNT-DAG: define dso_local void @Intersection.resume.0({{.*}}%struct.AnyHitTraversalData %1){{.*}} !continuation.registercount ![[intersection_registercount]] +; POSTPROCESS-REGCOUNT-DAG: define dso_local void @Intersection.resume.0({{.*}}%struct.AnyHitTraversalData %0){{.*}} !continuation.registercount ![[intersection_registercount]] ; POSTPROCESS-REGCOUNT-DAG: call void (i64, ...) @continuation.continue(i64 3, {{.*}} float 4.000000e+00, i32 0, %struct.BuiltInTriangleIntersectionAttributes {{.*}}), !continuation.registercount ![[intersection_registercount]] ; POSTPROCESS-REGCOUNT-DAG: ![[intersection_registercount]] = !{i32 30} @@ -189,14 +189,14 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. declare void @continuation.continue(i64, ...) -; POSTPROCESS-REGCOUNT-FEWREGS-DAG: define {{.*}} @_cont_Traversal({{.*}} !continuation.registercount ![[registercount]] +; POSTPROCESS-REGCOUNT-FEWREGS-DAG: define %struct._AmdTraversalResultData @_cont_Traversal({{.*}} !continuation.registercount ![[registercount]] ; ^--- this MD node has value 10 ; POSTPROCESS-REGCOUNT-FEWREGS-DAG: call {{.*}} @continuation.continue({{.*}} !continuation.registercount ![[registercount]] -; POSTPROCESS-REGCOUNT-DAG: define {{.*}} @_cont_Traversal({{.*}} !continuation.registercount ![[intersection_registercount]] +; POSTPROCESS-REGCOUNT-DAG: define %struct._AmdTraversalResultData @_cont_Traversal({{.*}} !continuation.registercount ![[intersection_registercount]] ; ^--- this MD node has value 30 ; POSTPROCESS-REGCOUNT-DAG: call {{.*}} @continuation.continue({{.*}} !continuation.registercount ![[intersection_registercount]] -define void @_cont_Traversal(%struct._AmdTraversalResultData* noalias nocapture sret(%struct._AmdTraversalResultData) %agg.result, i32 %csp, %struct._AmdSystemData* noalias %data) !types !44 { +define void @_cont_Traversal(%struct._AmdTraversalResultData* noalias nocapture sret(%struct._AmdTraversalResultData) %agg.result, %struct._AmdSystemData* noalias %data) !types !44 { call void (i64, ...) @continuation.continue(i64 0, i8 addrspace(21)* undef) ret void } @@ -274,7 +274,7 @@ attributes #3 = { nounwind } !41 = !{!"function", !"void", !42, !43} !42 = !{i32 0, %struct.RayPayload poison} !43 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} -!44 = !{!"function", !"void", !45, i32 poison, !46} +!44 = !{!"function", !"void", !45, !46} !45 = !{i32 0, %struct._AmdTraversalResultData poison} !46 = !{i32 0, %struct._AmdSystemData poison} !47 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !42} diff --git a/shared/continuations/test/dx/continuation-stacksize.ll b/llvmraytracing/test/dx/continuation-stacksize.ll similarity index 81% rename from shared/continuations/test/dx/continuation-stacksize.ll rename to llvmraytracing/test/dx/continuation-stacksize.ll index d718ea7fae..f3b7b288fd 100644 --- a/shared/continuations/test/dx/continuation-stacksize.ll +++ b/llvmraytracing/test/dx/continuation-stacksize.ll @@ -1,23 +1,13 @@ -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-STACKSIZE %s +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ +; RUN: -S %s 2> %t0.stderr | FileCheck -check-prefix=POSTPROCESS-STACKSIZE %s ; RUN: count 0 < %t0.stderr - -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,remove-types-metadata' \ -; RUN: -S %s 2> %t1.stderr | FileCheck -check-prefix=CLEANUP-STACKSIZE %s +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ +; RUN: -S %s 2> %t1.stderr | FileCheck -check-prefix=POSTPROCESS-STATESIZE %s ; RUN: count 0 < %t1.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,remove-types-metadata' \ -; RUN: -S %s 2> %t2.stderr | FileCheck -check-prefix=CLEANUP-STATESIZE %s -; RUN: count 0 < %t2.stderr - -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,remove-types-metadata' \ -; RUN: -S %s 2> %t3.stderr | FileCheck -check-prefix=SAVESTATE-STACKSIZE %s -; RUN: count 0 < %t3.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,remove-types-metadata' \ -; RUN: -S %s 2> %t4.stderr | FileCheck -check-prefix=SAVESTATE-STATESIZE %s -; RUN: count 0 < %t4.stderr ; The order of metadata on functions is non-deterministic, so make two different runs to match both of them. -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.DispatchSystemData = type { i32 } @@ -79,15 +69,10 @@ define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !typ ; LOWERRAYTRACINGPIPELINE-STACKSIZE-DAG: define void @main(%struct.DispatchSystemData %0){{.*}} !continuation.stacksize ![[main_stacksize:[0-9]+]] ; LOWERRAYTRACINGPIPELINE-STACKSIZE-DAG: ![[main_stacksize]] = !{i32 140} -; CLEANUP-STACKSIZE-DAG: define void @main(%struct.DispatchSystemData %0){{.*}} !continuation.stacksize ![[main_stacksize:[0-9]+]] -; CLEANUP-STACKSIZE-DAG: ![[main_stacksize]] = !{i32 140} -; CLEANUP-STATESIZE-DAG: define void @main(%struct.DispatchSystemData %0){{.*}} !continuation.state ![[main_state:[0-9]+]] -; CLEANUP-STATESIZE-DAG: ![[main_state]] = !{i32 0} - -; SAVESTATE-STACKSIZE-DAG: define void @main(%struct.DispatchSystemData %0){{.*}} !continuation.stacksize ![[main_stacksize:[0-9]+]] -; SAVESTATE-STACKSIZE-DAG: ![[main_stacksize]] = !{i32 140} -; SAVESTATE-STATESIZE-DAG: define void @main(%struct.DispatchSystemData %0){{.*}} !continuation.state ![[main_state:[0-9]+]] -; SAVESTATE-STATESIZE-DAG: ![[main_state]] = !{i32 0} +; POSTPROCESS-STACKSIZE-DAG: define void @main(){{.*}} !continuation.stacksize ![[main_stacksize:[0-9]+]] +; POSTPROCESS-STACKSIZE-DAG: ![[main_stacksize]] = !{i32 140} +; POSTPROCESS-STATESIZE-DAG: define void @main(){{.*}} !continuation.state ![[main_state:[0-9]+]] +; POSTPROCESS-STATESIZE-DAG: ![[main_state]] = !{i32 0} define void @main() { %params = alloca %struct.TheirParams, align 4 diff --git a/llvmraytracing/test/dx/continuation-state.ll b/llvmraytracing/test/dx/continuation-state.ll new file mode 100644 index 0000000000..25711f37c8 --- /dev/null +++ b/llvmraytracing/test/dx/continuation-state.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 +; RUN: opt --verify-each -passes='lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint' -S %s 2> %t0.stderr | FileCheck -check-prefix=CLEANUP %s +; RUN: count 0 < %t0.stderr +; RUN: opt --verify-each -passes='lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint' \ +; RUN: -S %s 2> %t1.stderr | FileCheck -check-prefix=REGISTERBUFFER %s +; RUN: count 0 < %t1.stderr + +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" + +%continuation.token = type { } + +declare void @await.void(%continuation.token*) +declare i32 @_cont_GetContinuationStackAddr() +declare %continuation.token* @async_fun() + +@PAYLOAD = external addrspace(20) global [30 x i32] + +define <4 x i32> @simple_await(<4 x i32> %arg) !continuation.registercount !1 { + %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 + call void @await.void(%continuation.token* %tok) + ret <4 x i32> %arg, !continuation.registercount !1 +} + +define void @simple_await_entry(<4 x i32> %arg, <4 x i32> addrspace(1)* %mem) !continuation.entry !0 !continuation.registercount !1 { + %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 + call void @await.void(%continuation.token* %tok) + store <4 x i32> %arg, <4 x i32> addrspace(1)* %mem + ret void, !continuation.registercount !1 +} + +!continuation.maxPayloadRegisterCount = !{!2} +!continuation.stackAddrspace = !{!3} + +!0 = !{} +!1 = !{i32 0} +!2 = !{i32 30} +!3 = !{i32 21} +; CLEANUP-LABEL: define void @simple_await( +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], <4 x i32> [[ARG:%.*]]) !continuation.registercount [[META2:![0-9]+]] !continuation [[META3:![0-9]+]] !continuation.stacksize [[META4:![0-9]+]] !continuation.state [[META4]] { +; CLEANUP-NEXT: AllocaSpillBB: +; CLEANUP-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANUP-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANUP-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 +; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CLEANUP-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; CLEANUP-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 +; CLEANUP-NEXT: unreachable +; +; +; CLEANUP-LABEL: define dso_local void @simple_await.resume.0( +; CLEANUP-SAME: ) !continuation.registercount [[META2]] !continuation [[META3]] { +; CLEANUP-NEXT: entryresume.0: +; CLEANUP-NEXT: call void @lgc.cps.free(i32 24) +; CLEANUP-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANUP-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANUP-NEXT: [[ARG_RELOAD:%.*]] = load <4 x i32>, ptr addrspace(32) [[ARG_RELOAD_ADDR]], align 4 +; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CLEANUP-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], <4 x i32> [[ARG_RELOAD]]), !continuation.registercount [[META2]] +; CLEANUP-NEXT: unreachable +; +; +; CLEANUP-LABEL: define void @simple_await_entry( +; CLEANUP-SAME: <4 x i32> [[ARG:%.*]], ptr addrspace(1) [[MEM:%.*]]) !continuation.registercount [[META2]] !continuation.entry [[META5:![0-9]+]] !continuation [[META6:![0-9]+]] !continuation.stacksize [[META4]] !continuation.state [[META4]] { +; CLEANUP-NEXT: AllocaSpillBB: +; CLEANUP-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANUP-NEXT: [[MEM_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CLEANUP-NEXT: store ptr addrspace(1) [[MEM]], ptr addrspace(32) [[MEM_SPILL_ADDR]], align 4 +; CLEANUP-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANUP-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 +; CLEANUP-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await_entry.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 +; CLEANUP-NEXT: unreachable +; +; +; CLEANUP-LABEL: define dso_local void @simple_await_entry.resume.0( +; CLEANUP-SAME: ) !continuation.registercount [[META2]] !continuation [[META6]] { +; CLEANUP-NEXT: entryresume.0: +; CLEANUP-NEXT: call void @lgc.cps.free(i32 24) +; CLEANUP-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANUP-NEXT: [[MEM_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CLEANUP-NEXT: [[MEM_RELOAD:%.*]] = load ptr addrspace(1), ptr addrspace(32) [[MEM_RELOAD_ADDR]], align 4 +; CLEANUP-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANUP-NEXT: [[ARG_RELOAD:%.*]] = load <4 x i32>, ptr addrspace(32) [[ARG_RELOAD_ADDR]], align 4 +; CLEANUP-NEXT: store <4 x i32> [[ARG_RELOAD]], ptr addrspace(1) [[MEM_RELOAD]], align 4 +; CLEANUP-NEXT: ret void +; CLEANUP: entryresume.0.split: +; CLEANUP-NEXT: unreachable +; +; +; REGISTERBUFFER-LABEL: define void @simple_await( +; REGISTERBUFFER-SAME: i64 [[RETURNADDR:%.*]], <4 x i32> [[ARG:%.*]]) !continuation.registercount [[META2:![0-9]+]] !continuation [[META3:![0-9]+]] !continuation.stacksize [[META4:![0-9]+]] !continuation.state [[META4]] { +; REGISTERBUFFER-NEXT: AllocaSpillBB: +; REGISTERBUFFER-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; REGISTERBUFFER-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; REGISTERBUFFER-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 +; REGISTERBUFFER-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; REGISTERBUFFER-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) +; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 +; REGISTERBUFFER-NEXT: unreachable +; +; +; REGISTERBUFFER-LABEL: define dso_local void @simple_await.resume.0( +; REGISTERBUFFER-SAME: ) !continuation.registercount [[META2]] !continuation [[META3]] { +; REGISTERBUFFER-NEXT: entryresume.0: +; REGISTERBUFFER-NEXT: call void @lgc.cps.free(i32 24) +; REGISTERBUFFER-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; REGISTERBUFFER-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; REGISTERBUFFER-NEXT: [[ARG_RELOAD:%.*]] = load <4 x i32>, ptr addrspace(32) [[ARG_RELOAD_ADDR]], align 4 +; REGISTERBUFFER-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; REGISTERBUFFER-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 +; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], <4 x i32> [[ARG_RELOAD]]), !continuation.registercount [[META2]] +; REGISTERBUFFER-NEXT: unreachable +; +; +; REGISTERBUFFER-LABEL: define void @simple_await_entry( +; REGISTERBUFFER-SAME: <4 x i32> [[ARG:%.*]], ptr addrspace(1) [[MEM:%.*]]) !continuation.registercount [[META2]] !continuation.entry [[META5:![0-9]+]] !continuation [[META6:![0-9]+]] !continuation.stacksize [[META4]] !continuation.state [[META4]] { +; REGISTERBUFFER-NEXT: AllocaSpillBB: +; REGISTERBUFFER-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; REGISTERBUFFER-NEXT: [[MEM_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; REGISTERBUFFER-NEXT: store ptr addrspace(1) [[MEM]], ptr addrspace(32) [[MEM_SPILL_ADDR]], align 4 +; REGISTERBUFFER-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; REGISTERBUFFER-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 +; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) +; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await_entry.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 +; REGISTERBUFFER-NEXT: unreachable +; +; +; REGISTERBUFFER-LABEL: define dso_local void @simple_await_entry.resume.0( +; REGISTERBUFFER-SAME: ) !continuation.registercount [[META2]] !continuation [[META6]] { +; REGISTERBUFFER-NEXT: entryresume.0: +; REGISTERBUFFER-NEXT: call void @lgc.cps.free(i32 24) +; REGISTERBUFFER-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; REGISTERBUFFER-NEXT: [[MEM_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; REGISTERBUFFER-NEXT: [[MEM_RELOAD:%.*]] = load ptr addrspace(1), ptr addrspace(32) [[MEM_RELOAD_ADDR]], align 4 +; REGISTERBUFFER-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; REGISTERBUFFER-NEXT: [[ARG_RELOAD:%.*]] = load <4 x i32>, ptr addrspace(32) [[ARG_RELOAD_ADDR]], align 4 +; REGISTERBUFFER-NEXT: store <4 x i32> [[ARG_RELOAD]], ptr addrspace(1) [[MEM_RELOAD]], align 4 +; REGISTERBUFFER-NEXT: ret void +; REGISTERBUFFER: entryresume.0.split: +; REGISTERBUFFER-NEXT: unreachable +; diff --git a/shared/continuations/test/dx/continuation-without-await.ll b/llvmraytracing/test/dx/continuation-without-await.ll similarity index 53% rename from shared/continuations/test/dx/continuation-without-await.ll rename to llvmraytracing/test/dx/continuation-without-await.ll index b42fd10b3d..93e1e702a2 100644 --- a/shared/continuations/test/dx/continuation-without-await.ll +++ b/llvmraytracing/test/dx/continuation-without-await.ll @@ -4,13 +4,16 @@ ; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,remove-types-metadata' \ ; RUN: -S %s 2> %t1.stderr | FileCheck -check-prefix=CLEANUP %s ; RUN: count 0 < %t1.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,remove-types-metadata' \ -; RUN: -S %s 2> %t2.stderr | FileCheck -check-prefix=SAVESTATE %s +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,remove-types-metadata' \ +; RUN: -S %s 2> %t2.stderr | FileCheck -check-prefix=REGISTERBUFFER %s ; RUN: count 0 < %t2.stderr +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ +; RUN: -S %s 2> %t3.stderr | FileCheck -check-prefix=POSTPROCESS %s +; RUN: count 0 < %t3.stderr ; @called and @main_no_call must be marked as continuation and end with a continue call to the return address -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.DispatchSystemData = type { i32 } @@ -127,7 +130,7 @@ attributes #2 = { nounwind } ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define void @main( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation.registercount [[META9]] !continuation [[META20:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !continuation.entry [[META20:![0-9]+]] !continuation.registercount [[META9]] !continuation [[META21:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[PARAMS:%.*]] = alloca [[STRUCT_THEIRPARAMS:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 @@ -137,7 +140,7 @@ attributes #2 = { nounwind } ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = load [1 x i32], ptr [[TMP2]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP3]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META21:![0-9]+]], !continuation.returnedRegistercount !21 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META22:![0-9]+]], !continuation.returnedRegistercount !22 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP4]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_THEIRPARAMS]] poison, ptr [[PARAMS]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 @@ -149,7 +152,7 @@ attributes #2 = { nounwind } ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define void @main_no_call( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9]] !continuation.entry [[META19]] !continuation.registercount [[META9]] !continuation [[META22:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9]] !continuation.entry [[META20]] !continuation.registercount [[META9]] !continuation [[META23:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) @@ -157,7 +160,7 @@ attributes #2 = { nounwind } ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @called( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META24:![0-9]+]] !continuation [[META25:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META24:![0-9]+]] !continuation.registercount [[META18:![0-9]+]] !continuation [[META25:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_MYPARAMS:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 @@ -187,7 +190,7 @@ attributes #2 = { nounwind } ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr getelementptr (i32, ptr @PAYLOAD, i64 2), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP19]], !continuation.registercount [[META24]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP19]], !continuation.registercount [[META18]] ; ; ; CLEANUP-LABEL: define i32 @_cont_GetLocalRootIndex( @@ -196,41 +199,41 @@ attributes #2 = { nounwind } ; ; ; CLEANUP-LABEL: define void @main( -; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation.registercount [[META9]] !continuation [[META20:![0-9]+]] !continuation.state [[META9]] { +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !continuation.entry [[META20:![0-9]+]] !continuation.registercount [[META9]] !continuation [[META21:![0-9]+]] !continuation.state [[META9]] { ; CLEANUP-NEXT: AllocaSpillBB: ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 ; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 ; CLEANUP-NEXT: store [1 x i32] [[DOTFCA_0_INSERT]], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP2]], i64 ptrtoint (ptr @main.resume.0 to i64), [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META21:![0-9]+]], !continuation.returnedRegistercount !21 +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 2, i64 ptrtoint (ptr @main.resume.0 to i64), [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META22:![0-9]+]], !continuation.returnedRegistercount !22 ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @main.resume.0( -; CLEANUP-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META9]] !continuation.registercount [[META21]] !continuation [[META20]] { +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9]] !continuation.registercount [[META22]] !continuation [[META21]] { ; CLEANUP-NEXT: entryresume.0: -; CLEANUP-NEXT: [[TMP2:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [1 x i32] [[TMP2]], 0 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; CLEANUP-NEXT: [[TMP1:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [1 x i32] [[TMP1]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; CLEANUP-NEXT: call void @continuation.complete() +; CLEANUP-NEXT: ret void +; CLEANUP: entryresume.0.split: ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define void @main_no_call( -; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9]] !continuation.entry [[META19]] !continuation.registercount [[META9]] !continuation [[META22:![0-9]+]] !continuation.state [[META9]] { +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9]] !continuation.entry [[META20]] !continuation.registercount [[META9]] !continuation [[META23:![0-9]+]] !continuation.state [[META9]] { ; CLEANUP-NEXT: AllocaSpillBB: ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; CLEANUP-NEXT: call void @continuation.complete() +; CLEANUP-NEXT: ret void +; CLEANUP: AllocaSpillBB.split: ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define void @called( -; CLEANUP-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META24:![0-9]+]] !continuation [[META25:![0-9]+]] !continuation.state [[META9]] { +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META24:![0-9]+]] !continuation.registercount [[META18:![0-9]+]] !continuation [[META25:![0-9]+]] !continuation.state [[META9]] { ; CLEANUP-NEXT: AllocaSpillBB: ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: [[TMP1:%.*]] = load i32, ptr @PAYLOAD, align 4 @@ -243,72 +246,137 @@ attributes #2 = { nounwind } ; CLEANUP-NEXT: store i32 [[TMP2]], ptr getelementptr (i32, ptr @PAYLOAD, i64 1), align 4 ; CLEANUP-NEXT: store i32 [[TMP3]], ptr getelementptr (i32, ptr @PAYLOAD, i64 2), align 4 ; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; CLEANUP-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP5]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META24]] +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META18]] ; CLEANUP-NEXT: unreachable ; ; -; SAVESTATE-LABEL: define i32 @_cont_GetLocalRootIndex( -; SAVESTATE-SAME: ptr [[DATA:%.*]]) { -; SAVESTATE-NEXT: ret i32 5 -; -; -; SAVESTATE-LABEL: define void @main( -; SAVESTATE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META18:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META19:![0-9]+]] !continuation.state [[META8]] { -; SAVESTATE-NEXT: AllocaSpillBB: -; SAVESTATE-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; SAVESTATE-NEXT: [[TMP1:%.*]] = call i32 @continuation.initialContinuationStackPtr() -; SAVESTATE-NEXT: store i32 [[TMP1]], ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; SAVESTATE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; SAVESTATE-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; SAVESTATE-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 -; SAVESTATE-NEXT: [[DOTFCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 -; SAVESTATE-NEXT: store i32 [[DOTFCA_0_INSERT_FCA_0_EXTRACT]], ptr addrspace(20) @PAYLOAD, align 4 -; SAVESTATE-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 -; SAVESTATE-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP2]], i64 ptrtoint (ptr @main.resume.0 to i64), [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META20:![0-9]+]], !continuation.returnedRegistercount !20 -; SAVESTATE-NEXT: unreachable -; -; -; SAVESTATE-LABEL: define dso_local void @main.resume.0( -; SAVESTATE-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META20]] !continuation [[META19]] { -; SAVESTATE-NEXT: entryresume.0: -; SAVESTATE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; SAVESTATE-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[TMP2]], 0 -; SAVESTATE-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 -; SAVESTATE-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 -; SAVESTATE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; SAVESTATE-NEXT: call void @continuation.complete() -; SAVESTATE-NEXT: unreachable -; -; -; SAVESTATE-LABEL: define void @main_no_call( -; SAVESTATE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.entry [[META18]] !continuation.registercount [[META8]] !continuation [[META21:![0-9]+]] !continuation.state [[META8]] { -; SAVESTATE-NEXT: AllocaSpillBB: -; SAVESTATE-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; SAVESTATE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; SAVESTATE-NEXT: call void @continuation.complete() -; SAVESTATE-NEXT: unreachable -; -; -; SAVESTATE-LABEL: define void @called( -; SAVESTATE-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.registercount [[META23:![0-9]+]] !continuation [[META24:![0-9]+]] !continuation.state [[META8]] { -; SAVESTATE-NEXT: AllocaSpillBB: -; SAVESTATE-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; SAVESTATE-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; SAVESTATE-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; SAVESTATE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i64 1) to ptr addrspace(20)), align 4 -; SAVESTATE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i64 2) to ptr addrspace(20)), align 4 -; SAVESTATE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) -; SAVESTATE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; SAVESTATE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) -; SAVESTATE-NEXT: store i32 [[TMP1]], ptr addrspace(20) @PAYLOAD, align 4 -; SAVESTATE-NEXT: store i32 [[TMP2]], ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i64 1) to ptr addrspace(20)), align 4 -; SAVESTATE-NEXT: store i32 [[TMP3]], ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i64 2) to ptr addrspace(20)), align 4 -; SAVESTATE-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; SAVESTATE-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; SAVESTATE-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP4]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META23]] -; SAVESTATE-NEXT: unreachable +; REGISTERBUFFER-LABEL: define i32 @_cont_GetLocalRootIndex( +; REGISTERBUFFER-SAME: ptr [[DATA:%.*]]) { +; REGISTERBUFFER-NEXT: ret i32 5 +; +; +; REGISTERBUFFER-LABEL: define void @main( +; REGISTERBUFFER-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META20:![0-9]+]] !continuation.state [[META8]] { +; REGISTERBUFFER-NEXT: AllocaSpillBB: +; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; REGISTERBUFFER-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; REGISTERBUFFER-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 +; REGISTERBUFFER-NEXT: [[DOTFCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 +; REGISTERBUFFER-NEXT: store i32 [[DOTFCA_0_INSERT_FCA_0_EXTRACT]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 2, i64 ptrtoint (ptr @main.resume.0 to i64), [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META21:![0-9]+]], !continuation.returnedRegistercount !21 +; REGISTERBUFFER-NEXT: unreachable +; +; +; REGISTERBUFFER-LABEL: define dso_local void @main.resume.0( +; REGISTERBUFFER-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META21]] !continuation [[META20]] { +; REGISTERBUFFER-NEXT: entryresume.0: +; REGISTERBUFFER-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[TMP1]], 0 +; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 +; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; REGISTERBUFFER-NEXT: ret void +; REGISTERBUFFER: entryresume.0.split: +; REGISTERBUFFER-NEXT: unreachable +; +; +; REGISTERBUFFER-LABEL: define void @main_no_call( +; REGISTERBUFFER-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.entry [[META19]] !continuation.registercount [[META8]] !continuation [[META22:![0-9]+]] !continuation.state [[META8]] { +; REGISTERBUFFER-NEXT: AllocaSpillBB: +; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; REGISTERBUFFER-NEXT: ret void +; REGISTERBUFFER: AllocaSpillBB.split: +; REGISTERBUFFER-NEXT: unreachable +; +; +; REGISTERBUFFER-LABEL: define void @called( +; REGISTERBUFFER-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META24:![0-9]+]] !continuation.state [[META8]] { +; REGISTERBUFFER-NEXT: AllocaSpillBB: +; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; REGISTERBUFFER-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i64 1) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i64 2) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) +; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; REGISTERBUFFER-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) +; REGISTERBUFFER-NEXT: store i32 [[TMP1]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-NEXT: store i32 [[TMP2]], ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i64 1) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-NEXT: store i32 [[TMP3]], ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i64 2) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] +; REGISTERBUFFER-NEXT: unreachable +; +; +; POSTPROCESS-LABEL: define i32 @_cont_GetLocalRootIndex( +; POSTPROCESS-SAME: ptr [[DATA:%.*]]) { +; POSTPROCESS-NEXT: ret i32 5 +; +; +; POSTPROCESS-LABEL: define void @main( +; POSTPROCESS-SAME: ) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META20:![0-9]+]] !continuation.state [[META8]] { +; POSTPROCESS-NEXT: AllocaSpillBB: +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() +; POSTPROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; POSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 +; POSTPROCESS-NEXT: store i32 [[DOTFCA_0_INSERT_FCA_0_EXTRACT]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @main.resume.0 to i64)) +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP1]], i64 [[TMP2]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META21:![0-9]+]], !continuation.returnedRegistercount !21 +; POSTPROCESS-NEXT: unreachable +; +; +; POSTPROCESS-LABEL: define dso_local void @main.resume.0( +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META21]] !continuation [[META20]] { +; POSTPROCESS-NEXT: entryresume.0: +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[TMP1]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; POSTPROCESS-NEXT: ret void +; POSTPROCESS: entryresume.0.split: +; POSTPROCESS-NEXT: unreachable +; +; +; POSTPROCESS-LABEL: define void @main_no_call( +; POSTPROCESS-SAME: ) !lgc.rt.shaderstage [[META8]] !continuation.entry [[META19]] !continuation.registercount [[META8]] !continuation [[META22:![0-9]+]] !continuation.state [[META8]] { +; POSTPROCESS-NEXT: AllocaSpillBB: +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() +; POSTPROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; POSTPROCESS-NEXT: ret void +; POSTPROCESS: AllocaSpillBB.split: +; POSTPROCESS-NEXT: unreachable +; +; +; POSTPROCESS-LABEL: define void @called( +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META24:![0-9]+]] !continuation.state [[META8]] { +; POSTPROCESS-NEXT: AllocaSpillBB: +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i64 1) to ptr addrspace(20)), align 4 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i64 2) to ptr addrspace(20)), align 4 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POSTPROCESS-NEXT: store i32 [[TMP1]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i64 1) to ptr addrspace(20)), align 4 +; POSTPROCESS-NEXT: store i32 [[TMP3]], ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i64 2) to ptr addrspace(20)), align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP4]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] +; POSTPROCESS-NEXT: unreachable ; diff --git a/shared/continuations/test/dx/dxil-cont-convert-lgc-rt-op-trace-payload-type.ll b/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op-trace-payload-type.ll similarity index 97% rename from shared/continuations/test/dx/dxil-cont-convert-lgc-rt-op-trace-payload-type.ll rename to llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op-trace-payload-type.ll index baffcd6dbf..0d52591c11 100644 --- a/shared/continuations/test/dx/dxil-cont-convert-lgc-rt-op-trace-payload-type.ll +++ b/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op-trace-payload-type.ll @@ -5,7 +5,7 @@ ; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint' -S %s 2> %t2.stderr | FileCheck -check-prefix=PAYLOADTYPE3-OPAQUE %s ; RUN: count 0 < %t2.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %struct.MyParams = type { [48 x i32] } %struct.TheirParams = type { [64 x i32] } diff --git a/shared/continuations/test/dx/dxil-cont-convert-lgc-rt-op-trace.ll b/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op-trace.ll similarity index 97% rename from shared/continuations/test/dx/dxil-cont-convert-lgc-rt-op-trace.ll rename to llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op-trace.ll index 2c743083e4..2e3e4948b1 100644 --- a/shared/continuations/test/dx/dxil-cont-convert-lgc-rt-op-trace.ll +++ b/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op-trace.ll @@ -2,7 +2,7 @@ ; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint' -S %s 2> %t0.stderr | FileCheck %s ; RUN: count 0 < %t0.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %struct.MyParams = type { [48 x i32] } %struct.TheirParams = type { [64 x i32] } diff --git a/shared/continuations/test/dx/dxil-cont-convert-lgc-rt-op.ll b/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op.ll similarity index 97% rename from shared/continuations/test/dx/dxil-cont-convert-lgc-rt-op.ll rename to llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op.ll index dd0be64828..3320a0c84a 100644 --- a/shared/continuations/test/dx/dxil-cont-convert-lgc-rt-op.ll +++ b/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op.ll @@ -2,7 +2,7 @@ ; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,remove-types-metadata' -S %s 2> %t0.stderr | FileCheck %s ; RUN: count 0 < %t0.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %struct.DispatchSystemData = type { <3 x i32> } %struct.SystemData = type { %struct.DispatchSystemData } diff --git a/shared/continuations/test/dx/dxil-cont-intrinsic-prepare.ll b/llvmraytracing/test/dx/dxil-cont-intrinsic-prepare.ll similarity index 85% rename from shared/continuations/test/dx/dxil-cont-intrinsic-prepare.ll rename to llvmraytracing/test/dx/dxil-cont-intrinsic-prepare.ll index 4da900240e..3d3585152a 100644 --- a/shared/continuations/test/dx/dxil-cont-intrinsic-prepare.ll +++ b/llvmraytracing/test/dx/dxil-cont-intrinsic-prepare.ll @@ -2,7 +2,7 @@ ; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint' -S %s 2> %t.stderr | FileCheck %s ; RUN: count 0 < %t.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %struct.DispatchSystemData = type { i32 } %struct.TraversalData = type { %struct.SystemData, i32, i64 } @@ -84,7 +84,7 @@ attributes #4 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; CHECK-LABEL: define %struct.DispatchSystemData @_cont_SetupRayGen( -; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !types [[META0:![0-9]+]] { +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA:%.*]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP1]], i32 0, i32 0 ; CHECK-NEXT: store i32 2, ptr [[TMP2]], align 4 @@ -92,7 +92,7 @@ attributes #4 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 3 ; CHECK-NEXT: br i1 [[C]], label [[COMPLETE:%.*]], label [[END:%.*]] ; CHECK: complete: -; CHECK-NEXT: call void @continuation.complete() +; CHECK-NEXT: call void @_AmdComplete() #[[ATTR5:[0-9]+]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP1]], align 4 @@ -100,27 +100,27 @@ attributes #4 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; CHECK-LABEL: define %struct.DispatchSystemData @_cont_TraceRay( -; CHECK-SAME: ptr nocapture readonly [[DATA:%.*]], i64 [[ACCELSTRUCT:%.*]], i32 [[RAYFLAGS:%.*]], i32 [[INSTANCEINCLUSIOMASK:%.*]], i32 [[RAYCONTRIBUTIONTOHITGROUPINDEX:%.*]], i32 [[MULTIPLIERFORGEOMETRYCONTRIBUTIONTOSHADERINDEX:%.*]], i32 [[MISSSHADERINDEX:%.*]], float [[ORIGINX:%.*]], float [[ORIGINY:%.*]], float [[ORIGINZ:%.*]], float [[TMIN:%.*]], float [[DIRX:%.*]], float [[DIRY:%.*]], float [[DIRZ:%.*]], float [[TMAX:%.*]]) #[[ATTR1]] !types [[META2:![0-9]+]] { +; CHECK-SAME: ptr nocapture readonly [[DATA:%.*]], i64 [[ACCELSTRUCT:%.*]], i32 [[RAYFLAGS:%.*]], i32 [[INSTANCEINCLUSIOMASK:%.*]], i32 [[RAYCONTRIBUTIONTOHITGROUPINDEX:%.*]], i32 [[MULTIPLIERFORGEOMETRYCONTRIBUTIONTOSHADERINDEX:%.*]], i32 [[MISSSHADERINDEX:%.*]], float [[ORIGINX:%.*]], float [[ORIGINY:%.*]], float [[ORIGINZ:%.*]], float [[TMIN:%.*]], float [[DIRX:%.*]], float [[DIRY:%.*]], float [[DIRZ:%.*]], float [[TMAX:%.*]]) #[[ATTR1]] !types [[META0:![0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_TRAVERSALDATA:%.*]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA:%.*]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast ptr [[TMP1]] to ptr -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[TMP6]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[TMP6]]) #[[ATTR5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[TMP1]], i32 0, i32 0, i32 0, i32 0 ; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @_AmdGetResumePointAddr() +; CHECK-NEXT: [[ADDR:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR5]] ; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[TMP1]], i32 0, i32 2 -; CHECK-NEXT: store i64 [[TMP8]], ptr [[A]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @_AmdAwait(i64 3, [[STRUCT_TRAVERSALDATA]] [[TMP9]]) -; CHECK-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP10]], ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP2]], i32 0, i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP3]], i32 0, i32 0 -; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP13]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[TMP6]]) #[[ATTR6]] -; CHECK-NEXT: [[TMP14:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP3]], align 4 -; CHECK-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP14]] +; CHECK-NEXT: store i64 [[ADDR]], ptr [[A]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @_AmdAwait(i64 3, [[STRUCT_TRAVERSALDATA]] [[TMP8]]) +; CHECK-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP9]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP3]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP12]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[TMP6]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP13:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP3]], align 4 +; CHECK-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP13]] ; diff --git a/shared/continuations/test/dx/dxil-cont-post-process-report-sizes.ll b/llvmraytracing/test/dx/dxil-cont-post-process-report-sizes.ll similarity index 83% rename from shared/continuations/test/dx/dxil-cont-post-process-report-sizes.ll rename to llvmraytracing/test/dx/dxil-cont-post-process-report-sizes.ll index 8939df5a18..114fd214c6 100644 --- a/shared/continuations/test/dx/dxil-cont-post-process-report-sizes.ll +++ b/llvmraytracing/test/dx/dxil-cont-post-process-report-sizes.ll @@ -2,7 +2,7 @@ ; RUN: opt --report-payload-register-sizes --verify-each -passes='dxil-cont-post-process,lint,remove-types-metadata' -S %s 2>&1 | FileCheck %s --check-prefix=REPORT-PAYLOAD-SIZES ; RUN: opt --report-system-data-sizes --verify-each -passes='dxil-cont-post-process,lint,remove-types-metadata' -S %s 2>&1 | FileCheck %s --check-prefix=REPORT-SYSTEM-DATA-SIZES -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %struct.DispatchSystemData = type { i32 } %struct.CHSSystemData = type { [100 x i32] } @@ -26,13 +26,13 @@ define void @RayGen(%struct.DispatchSystemData %0) !continuation.entry !0 !conti ; This is needed as fake continuation of RayGen, because we only report continuation state sizes ; if we find a continuation function using !continuation metadata. ; REPORT-SYSTEM-DATA-SIZES-DAG: Incoming system data of "RayGen.resume.0" (raygeneration) is "struct.DispatchSystemData", size: 4 bytes -define void @RayGen.resume.0(i32 %0, %struct.DispatchSystemData %1) !continuation !3 !lgc.rt.shaderstage !12 { +define void @RayGen.resume.0(%struct.DispatchSystemData %0) !continuation !3 !lgc.rt.shaderstage !12 { ret void } ; REPORT-PAYLOAD-SIZES: Incoming and max outgoing payload VGPR size of "CHS" (closesthit): 32 and 36 bytes ; REPORT-SYSTEM-DATA-SIZES-DAG: Incoming system data of "CHS" (closesthit) is "struct.CHSSystemData", size: 400 bytes -define void @CHS(i32 %cspInit, i64 %returnAddr, %struct.CHSSystemData %0) !continuation !14 !continuation.registercount !8 !lgc.rt.shaderstage !13 { +define void @CHS(i64 %returnAddr, %struct.CHSSystemData %0) !continuation !14 !continuation.registercount !8 !lgc.rt.shaderstage !13 { call void (i64, ...) @continuation.continue(i64 2), !continuation.registercount !9 ret void } diff --git a/llvmraytracing/test/dx/dxil-cont-post-process.ll b/llvmraytracing/test/dx/dxil-cont-post-process.ll new file mode 100644 index 0000000000..b767a069e6 --- /dev/null +++ b/llvmraytracing/test/dx/dxil-cont-post-process.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt --verify-each -passes='dxil-cont-post-process,lint' -S %s 2> %t.stderr | FileCheck %s +; RUN: count 0 < %t.stderr + +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" + +%struct.DispatchSystemData = type { i32 } + +@debug_global = external global i1 + +declare void @_AmdComplete() #0 +declare i32 @continuation.initialContinuationStackPtr() +declare i32 @_cont_GetContinuationStackAddr() +declare i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) + +; Function Attrs: nounwind +define %struct.DispatchSystemData @_cont_SetupRayGen() { +; CHECK-LABEL: define %struct.DispatchSystemData @_cont_SetupRayGen() { +; CHECK-NEXT: [[DATA:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] undef, i32 0, 0 +; CHECK-NEXT: [[C:%.*]] = load i1, ptr @debug_global, align 1 +; CHECK-NEXT: br i1 [[C]], label [[COMPLETE:%.*]], label [[END:%.*]] +; CHECK: complete: +; CHECK-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] poison +; CHECK: complete.split: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[DATA]] +; + %data = insertvalue %struct.DispatchSystemData undef, i32 0, 0 + %c = load i1, ptr @debug_global, align 1 + br i1 %c, label %complete, label %end + +complete: ; preds = %0 + call void @_AmdComplete() #3 + br label %end + +end: ; preds = %complete, %0 + ret %struct.DispatchSystemData %data +} + +define void @RayGen(%struct.DispatchSystemData %0) !lgc.rt.shaderstage !5 !continuation.entry !0 !continuation !3 { +; CHECK-LABEL: define void @RayGen( +; CHECK-SAME: ) !lgc.rt.shaderstage [[META3:![0-9]+]] !continuation.entry [[META4:![0-9]+]] !continuation [[META5:![0-9]+]] { +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @_cont_GetContinuationStackAddr() +; CHECK-NEXT: store i32 [[TMP1]], ptr [[CSP]], align 4 +; CHECK-NEXT: [[C_I:%.*]] = load i1, ptr @debug_global, align 1 +; CHECK-NEXT: br i1 [[C_I]], label [[COMPLETE_I:%.*]], label [[_CONT_SETUPRAYGEN_EXIT:%.*]] +; CHECK: complete.i: +; CHECK-NEXT: ret void +; CHECK: complete.i.split: +; CHECK-NEXT: br label [[_CONT_SETUPRAYGEN_EXIT]] +; CHECK: _cont_SetupRayGen.exit: +; CHECK-NEXT: ret void +; + ret void +} + +define void @RayGen.resume.0(%struct.DispatchSystemData %0) !lgc.rt.shaderstage !5 !continuation !3 { +; CHECK-LABEL: define void @RayGen.resume.0( +; CHECK-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META3]] !continuation [[META5]] { +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; CHECK-NEXT: ret void +; + ret void +} + +!dx.entryPoints = !{!1} +!continuation.stackAddrspace = !{!4} + +!0 = !{} +!1 = !{void ()* @RayGen, !"RayGen", null, null, !2} +!2 = !{i32 8, i32 7} +!3 = !{void ()* @RayGen} +!4 = !{i32 21} +!5 = !{i32 0} + +attributes #0 = { nounwind } diff --git a/shared/continuations/test/dx/dxil-cont-prepare-traversal.ll b/llvmraytracing/test/dx/dxil-cont-prepare-traversal.ll similarity index 78% rename from shared/continuations/test/dx/dxil-cont-prepare-traversal.ll rename to llvmraytracing/test/dx/dxil-cont-prepare-traversal.ll index efea1c0981..55317479f3 100644 --- a/shared/continuations/test/dx/dxil-cont-prepare-traversal.ll +++ b/llvmraytracing/test/dx/dxil-cont-prepare-traversal.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,remove-types-metadata' -S %s 2> %t0.stderr | FileCheck --check-prefix=PREPARE %s ; RUN: count 0 < %t0.stderr -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck --check-prefix=ALL %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck --check-prefix=ALL %s ; RUN: count 0 < %t1.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %struct.TraversalData = type { %struct.SystemData, i32 } %struct.SystemData = type { %struct.DispatchSystemData, float } @@ -80,34 +80,36 @@ attributes #2 = { nounwind } ; PREPARE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 ; PREPARE-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 ; PREPARE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[TMP1]], i32 0, i32 0 -; PREPARE-NEXT: br i1 [[TMP4]], label [[TMP12:%.*]], label [[TMP6:%.*]] +; PREPARE-NEXT: br i1 [[TMP4]], label [[TMP8:%.*]], label [[TMP6:%.*]] ; PREPARE: 6: -; PREPARE-NEXT: [[TMP7:%.*]] = call i32 @_AmdContPayloadRegistersI32Count() -; PREPARE-NEXT: [[TMP8:%.*]] = call i32 @_AmdContPayloadRegistersGetI32(i32 0) +; PREPARE-NEXT: [[I1:%.*]] = call i32 @_AmdContPayloadRegistersI32Count() +; PREPARE-NEXT: [[I2:%.*]] = call i32 @_AmdContPayloadRegistersGetI32(i32 0) ; PREPARE-NEXT: call void @_AmdContPayloadRegistersSetI32(i32 0, i32 1) -; PREPARE-NEXT: [[TMP9:%.*]] = call i32 @_AmdValueI32CountSomething(ptr [[TMP1]]) -; PREPARE-NEXT: [[TMP10:%.*]] = call i32 @_AmdValueGetI32Something(ptr [[TMP1]], i32 0) +; PREPARE-NEXT: [[I3:%.*]] = call i32 @_AmdValueI32CountSomething(ptr [[TMP1]]) +; PREPARE-NEXT: [[I4:%.*]] = call i32 @_AmdValueGetI32Something(ptr [[TMP1]], i32 0) ; PREPARE-NEXT: call void @_AmdValueSetI32Something(ptr [[TMP1]], i32 0, i32 1) ; PREPARE-NEXT: [[A0:%.*]] = zext i1 false to i32 -; PREPARE-NEXT: [[A1:%.*]] = add i32 [[A0]], [[TMP7]] -; PREPARE-NEXT: [[A2:%.*]] = add i32 [[A1]], [[TMP8]] -; PREPARE-NEXT: [[A3:%.*]] = add i32 [[A2]], [[TMP9]] -; PREPARE-NEXT: [[A4:%.*]] = add i32 [[A3]], [[TMP10]] +; PREPARE-NEXT: [[A1:%.*]] = add i32 [[A0]], [[I1]] +; PREPARE-NEXT: [[A2:%.*]] = add i32 [[A1]], [[I2]] +; PREPARE-NEXT: [[A3:%.*]] = add i32 [[A2]], [[I3]] +; PREPARE-NEXT: [[A4:%.*]] = add i32 [[A3]], [[I4]] ; PREPARE-NEXT: [[ADDR:%.*]] = zext i32 [[A4]] to i64 -; PREPARE-NEXT: [[TMP11:%.*]] = load [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP5]], align 4 -; PREPARE-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 [[ADDR]], i64 -1, i32 [[STACKPTR]], i64 ptrtoint (ptr @_cont_Traversal to i64), [[STRUCT_SYSTEMDATA]] [[TMP11]]) -; PREPARE-NEXT: br label [[TMP14:%.*]] -; PREPARE: 12: -; PREPARE-NEXT: [[TMP13:%.*]] = load [[STRUCT_SYSTEMDATA]], ptr [[TMP5]], align 4 -; PREPARE-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 0, i64 -1, i32 [[STACKPTR]], [[STRUCT_SYSTEMDATA]] [[TMP13]]) -; PREPARE-NEXT: br label [[TMP14]] -; PREPARE: 14: +; PREPARE-NEXT: [[TMP7:%.*]] = load [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP5]], align 4 +; PREPARE-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 [[ADDR]], i64 -1, i32 [[STACKPTR]], i64 ptrtoint (ptr @_cont_Traversal to i64), [[STRUCT_SYSTEMDATA]] [[TMP7]]) +; PREPARE-NEXT: br label [[TMP10:%.*]] +; PREPARE: 8: +; PREPARE-NEXT: [[TMP9:%.*]] = load [[STRUCT_SYSTEMDATA]], ptr [[TMP5]], align 4 +; PREPARE-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 0, i64 -1, i32 [[STACKPTR]], [[STRUCT_SYSTEMDATA]] [[TMP9]]) +; PREPARE-NEXT: br label [[TMP10]] +; PREPARE: 10: ; PREPARE-NEXT: ret void ; ; ; ALL-LABEL: define void @_cont_Traversal( -; ALL-SAME: i32 [[STACKPTR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[DATA:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META2:![0-9]+]] !continuation [[META3:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] { +; ALL-SAME: i32 [[CSPINIT:%.*]], i32 [[STACKPTR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[DATA:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META2:![0-9]+]] !continuation [[META3:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] { ; ALL-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 +; ALL-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; ALL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; ALL-NEXT: [[DATA_FCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[DATA]], 0, 0, 0 ; ALL-NEXT: [[DATA_FCA_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[TMP1]], i32 0, i32 0, i32 0, i32 0 ; ALL-NEXT: store i32 [[DATA_FCA_0_0_0_EXTRACT]], ptr [[DATA_FCA_0_0_0_GEP]], align 4 @@ -121,7 +123,7 @@ attributes #2 = { nounwind } ; ALL-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 ; ALL-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 ; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[TMP1]], i32 0, i32 0 -; ALL-NEXT: br i1 [[TMP4]], label [[TMP12:%.*]], label [[TMP6:%.*]] +; ALL-NEXT: br i1 [[TMP4]], label [[TMP13:%.*]], label [[TMP6:%.*]] ; ALL: 6: ; ALL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 ; ALL-NEXT: store i32 1, ptr addrspace(20) @REGISTERS, align 4 @@ -141,18 +143,20 @@ attributes #2 = { nounwind } ; ALL-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[TMP5]], i32 0, i32 1 ; ALL-NEXT: [[DOTFCA_1_LOAD:%.*]] = load float, ptr [[DOTFCA_1_GEP]], align 4 ; ALL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] [[DOTFCA_0_0_INSERT]], float [[DOTFCA_1_LOAD]], 1 -; ALL-NEXT: [[TMP11:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @_cont_Traversal to i64)) -; ALL-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 [[ADDR]], i64 -1, i32 [[STACKPTR]], i64 [[TMP11]], [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT]]), !continuation.registercount [[META0]] -; ALL-NEXT: br label [[TMP13:%.*]] -; ALL: 12: +; ALL-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; ALL-NEXT: [[TMP12:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @_cont_Traversal to i64)) +; ALL-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 [[ADDR]], i64 -1, i32 [[TMP11]], i32 [[STACKPTR]], i64 [[TMP12]], [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT]]), !continuation.registercount [[META0]] +; ALL-NEXT: br label [[TMP15:%.*]] +; ALL: 13: ; ALL-NEXT: [[DOTFCA_0_0_GEP1:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[TMP5]], i32 0, i32 0, i32 0 ; ALL-NEXT: [[DOTFCA_0_0_LOAD2:%.*]] = load i32, ptr [[DOTFCA_0_0_GEP1]], align 4 ; ALL-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] poison, i32 [[DOTFCA_0_0_LOAD2]], 0, 0 ; ALL-NEXT: [[DOTFCA_1_GEP4:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[TMP5]], i32 0, i32 1 ; ALL-NEXT: [[DOTFCA_1_LOAD5:%.*]] = load float, ptr [[DOTFCA_1_GEP4]], align 4 ; ALL-NEXT: [[DOTFCA_1_INSERT6:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] [[DOTFCA_0_0_INSERT3]], float [[DOTFCA_1_LOAD5]], 1 -; ALL-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 0, i64 -1, i32 [[STACKPTR]], [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT6]]), !continuation.registercount [[META0]] -; ALL-NEXT: br label [[TMP13]] -; ALL: 13: +; ALL-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; ALL-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 0, i64 -1, i32 [[TMP14]], i32 [[STACKPTR]], [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT6]]), !continuation.registercount [[META0]] +; ALL-NEXT: br label [[TMP15]] +; ALL: 15: ; ALL-NEXT: ret void ; diff --git a/llvmraytracing/test/dx/dxil-cps-stack-lowering-global.ll b/llvmraytracing/test/dx/dxil-cps-stack-lowering-global.ll new file mode 100644 index 0000000000..814981a812 --- /dev/null +++ b/llvmraytracing/test/dx/dxil-cps-stack-lowering-global.ll @@ -0,0 +1,205 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 +; RUN: opt --verify-each -passes='dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t0.stderr | FileCheck -check-prefix=CPS-STACK-LOWERING-CPS %s +; RUN: count 0 < %t0.stderr + +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" + +%dx.types.Handle = type { ptr } +%struct.DispatchSystemData = type { i32 } +%struct.TraversalData = type { %struct.SystemData } +%struct.SystemData = type { %struct.DispatchSystemData } +%struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } +%called.Frame = type { i32 } +%struct.type = type { <2 x float> } + +@"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 +@PAYLOAD = external addrspace(20) global [30 x i32] + +declare i32 @_cont_GetContinuationStackAddr() + +declare %struct.DispatchSystemData @_cont_SetupRayGen() + +declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) + +declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) + +declare %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(ptr) + +declare void @_AmdRestoreSystemData(ptr) + +define i32 @_cont_GetLocalRootIndex(ptr %data) { + ret i32 5 +} + +declare i64 @_cont_GetContinuationStackGlobalMemBase() + +define void @called(%struct.type %cont.state, i32 %return.addr, i32 %shader.index, %struct.DispatchSystemData %0) !lgc.rt.shaderstage !15 !lgc.cps !16 !continuation !17 { +AllocaSpillBB: + %1 = call ptr addrspace(32) @lgc.cps.alloc(i32 8) + %return.addr.spill.addr = getelementptr inbounds %called.Frame, ptr addrspace(32) %1, i32 0, i32 0 + store i32 %return.addr, ptr addrspace(32) %return.addr.spill.addr, align 4 + %2 = call %struct.DispatchSystemData @continuations.getSystemData.s_struct.DispatchSystemDatas() + %.fca.0.extract = extractvalue %struct.DispatchSystemData %2, 0 + call void @amd.dx.setLocalRootIndex(i32 5) + %ptr = getelementptr i8, ptr addrspace(32) %1, i32 9 + store i32 99, ptr addrspace(32) %ptr + %dis_data.i.fca.0.insert = insertvalue %struct.DispatchSystemData poison, i32 %.fca.0.extract, 0 + store i32 undef, ptr addrspace(20) @PAYLOAD, align 4 + %3 = call i32 (...) @lgc.cps.as.continuation.reference(ptr @called.resume.0) + call void (...) @lgc.cps.jump(i32 2, i32 2, %struct.type %cont.state, i32 %3, %struct.DispatchSystemData %dis_data.i.fca.0.insert), !continuation.registercount !16 + unreachable +} + +define void @called.resume.0({} %cont.state, i32 %returnAddr, %struct.type %0, %struct.DispatchSystemData %1) !lgc.rt.shaderstage !15 !lgc.cps !16 !continuation !17 { +entryresume.0: + %2 = call ptr addrspace(32) @lgc.cps.peek(i32 8) + %3 = load i32, ptr addrspace(20) @PAYLOAD, align 4 + %4 = extractvalue %struct.type %0, 0 + %.fca.0.extract3 = extractvalue %struct.DispatchSystemData %1, 0 + call void @amd.dx.setLocalRootIndex(i32 5) + %return.addr.reload.addr = getelementptr inbounds %called.Frame, ptr addrspace(32) %2, i32 0, i32 0 + %return.addr.reload = load i32, ptr addrspace(32) %return.addr.reload.addr, align 4 + call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) + store i32 %3, ptr addrspace(20) @PAYLOAD, align 4 + %.fca.0.insert = insertvalue %struct.DispatchSystemData poison, i32 %.fca.0.extract3, 0 + call void @lgc.cps.free(i32 8) + call void (...) @lgc.cps.jump(i32 %return.addr.reload, i32 2, %struct.type %0, %struct.DispatchSystemData %.fca.0.insert), !continuation.registercount !16 + unreachable +} + +; Function Attrs: nofree nounwind willreturn +declare void @amd.dx.setLocalRootIndex(i32) #0 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) +declare void @registerbuffer.setpointerbarrier(...) #1 + +; Function Attrs: nounwind willreturn +declare %struct.DispatchSystemData @continuations.getSystemData.s_struct.DispatchSystemDatas() #2 + +; Function Attrs: noreturn +declare void @lgc.cps.jump(...) #3 + +; Function Attrs: nounwind willreturn +declare %struct.DispatchSystemData @lgc.cps.await.s_struct.DispatchSystemDatas(...) #2 + +declare !continuation !17 { ptr, ptr } @continuation.prototype.called(ptr, i1) + +declare ptr @continuation.malloc(i32) + +declare void @continuation.free(ptr) + +; Function Attrs: nounwind +declare token @llvm.coro.id.retcon(i32, i32, ptr, ptr, ptr, ptr) #4 + +; Function Attrs: nounwind +declare ptr @llvm.coro.begin(token, ptr writeonly) #4 + +; Function Attrs: nounwind +declare i1 @llvm.coro.suspend.retcon.i1(...) #4 + +; Function Attrs: nounwind willreturn +declare %struct.DispatchSystemData @continuations.getReturnValue.s_struct.DispatchSystemDatas() #2 + +; Function Attrs: noreturn +declare void @continuation.return(...) #3 + +; Function Attrs: nounwind willreturn memory(inaccessiblemem: readwrite) +declare ptr addrspace(32) @lgc.cps.alloc(i32) #5 + +; Function Attrs: nounwind willreturn +declare i32 @lgc.cps.as.continuation.reference(...) #2 + +; Function Attrs: nounwind willreturn memory(inaccessiblemem: read) +declare ptr addrspace(32) @lgc.cps.peek(i32) #6 + +; Function Attrs: nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @lgc.cps.free(i32) #5 + +attributes #0 = { nofree nounwind willreturn } +attributes #1 = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +attributes #2 = { nounwind willreturn } +attributes #3 = { noreturn } +attributes #4 = { nounwind } +attributes #5 = { nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #6 = { nounwind willreturn memory(inaccessiblemem: read) } + +!llvm.ident = !{!0} +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.entryPoints = !{!3, !6} +!lgc.cps.module = !{} +!continuation.maxPayloadRegisterCount = !{!13} +!continuation.stackAddrspace = !{!14} + +!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"} +!1 = !{i32 1, i32 6} +!2 = !{!"lib", i32 6, i32 6} +!3 = !{null, !"", null, !4, !12} +!4 = !{!5, !9, null, null} +!5 = !{!6} +!6 = !{ptr @called, !"called", null, null, !7} +!7 = !{i32 8, i32 12, i32 6, i32 16, i32 7, i32 8, i32 5, !8} +!8 = !{i32 0} +!9 = !{!10} +!10 = !{i32 0, ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", !"RenderTarget", i32 0, i32 0, i32 1, i32 2, i1 false, i1 false, i1 false, !11} +!11 = !{i32 0, i32 9} +!12 = !{i32 0, i64 65536} +!13 = !{i32 30} +!14 = !{i32 22} +!15 = !{i32 5} +!16 = !{i32 1} +!17 = !{ptr @called} +; CPS-STACK-LOWERING-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; CPS-STACK-LOWERING-CPS-SAME: ptr [[DATA:%.*]]) { +; CPS-STACK-LOWERING-CPS-NEXT: ret i32 5 +; +; +; CPS-STACK-LOWERING-CPS-LABEL: define void @called( +; CPS-STACK-LOWERING-CPS-SAME: [[STRUCT_TYPE:%.*]] [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META15:![0-9]+]] !lgc.cps [[META16:![0-9]+]] !continuation [[META17:![0-9]+]] { +; CPS-STACK-LOWERING-CPS-NEXT: AllocaSpillBB: +; CPS-STACK-LOWERING-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 8 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP4]], ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP3]] +; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(22) [[TMP5]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP6:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @continuations.getSystemData.s_struct.DispatchSystemDatas() +; CPS-STACK-LOWERING-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP6]], 0 +; CPS-STACK-LOWERING-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP7:%.*]] = add i32 [[TMP3]], 9 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP7]] +; CPS-STACK-LOWERING-CPS-NEXT: store i32 99, ptr addrspace(22) [[TMP8]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 undef, ptr addrspace(20) @REGISTERS, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP9:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @called.resume.0) +; CPS-STACK-LOWERING-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, [[STRUCT_TYPE]] [[CONT_STATE]], i32 [[TMP9]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META16]] +; CPS-STACK-LOWERING-CPS-NEXT: unreachable +; +; +; CPS-STACK-LOWERING-CPS-LABEL: define void @called.resume.0( +; CPS-STACK-LOWERING-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURNADDR:%.*]], [[STRUCT_TYPE:%.*]] [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META15]] !lgc.cps [[META16]] !continuation [[META17]] { +; CPS-STACK-LOWERING-CPS-NEXT: entryresume.0: +; CPS-STACK-LOWERING-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP2:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(22) +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], -8 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP7:%.*]] = extractvalue [[STRUCT_TYPE]] [[TMP0]], 0 +; CPS-STACK-LOWERING-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; CPS-STACK-LOWERING-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP5]] +; CPS-STACK-LOWERING-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(22) [[TMP8]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP6]], ptr addrspace(20) @REGISTERS, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], -8 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP10]], ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 2, [[STRUCT_TYPE]] [[TMP0]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META16]] +; CPS-STACK-LOWERING-CPS-NEXT: unreachable +; diff --git a/llvmraytracing/test/dx/dxil-cps-stack-lowering-scratch.ll b/llvmraytracing/test/dx/dxil-cps-stack-lowering-scratch.ll new file mode 100644 index 0000000000..9f0d0f79cb --- /dev/null +++ b/llvmraytracing/test/dx/dxil-cps-stack-lowering-scratch.ll @@ -0,0 +1,204 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 +; RUN: opt --verify-each -passes='dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t0.stderr | FileCheck -check-prefix=CPS-STACK-LOWERING-CPS %s +; RUN: count 0 < %t0.stderr + +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" + +%dx.types.Handle = type { ptr } +%struct.DispatchSystemData = type { i32 } +%struct.TraversalData = type { %struct.SystemData } +%struct.SystemData = type { %struct.DispatchSystemData } +%struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } +%called.Frame = type { i32 } +%struct.type = type { <2 x float> } + +@"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 +@PAYLOAD = external addrspace(20) global [30 x i32] + +declare i32 @_cont_GetContinuationStackAddr() + +declare %struct.DispatchSystemData @_cont_SetupRayGen() + +declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) + +declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) + +declare %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(ptr) + +declare void @_AmdRestoreSystemData(ptr) + +define i32 @_cont_GetLocalRootIndex(ptr %data) { + ret i32 5 +} + +declare i64 @_cont_GetContinuationStackGlobalMemBase() + +define void @called(%struct.type %cont.state, i32 %return.addr, i32 %shader.index, %struct.DispatchSystemData %0) !lgc.rt.shaderstage !15 !lgc.cps !16 !continuation !17 { +AllocaSpillBB: + %1 = call ptr addrspace(32) @lgc.cps.alloc(i32 8) + %return.addr.spill.addr = getelementptr inbounds %called.Frame, ptr addrspace(32) %1, i32 0, i32 0 + store i32 %return.addr, ptr addrspace(32) %return.addr.spill.addr, align 4 + %2 = call %struct.DispatchSystemData @continuations.getSystemData.s_struct.DispatchSystemDatas() + %.fca.0.extract = extractvalue %struct.DispatchSystemData %2, 0 + call void @amd.dx.setLocalRootIndex(i32 5) + %ptr = getelementptr i8, ptr addrspace(32) %1, i32 9 + store i32 99, ptr addrspace(32) %ptr + %dis_data.i.fca.0.insert = insertvalue %struct.DispatchSystemData poison, i32 %.fca.0.extract, 0 + store i32 undef, ptr addrspace(20) @PAYLOAD, align 4 + %3 = call i32 (...) @lgc.cps.as.continuation.reference(ptr @called.resume.0) + call void (...) @lgc.cps.jump(i32 2, i32 2, %struct.type %cont.state, i32 %3, %struct.DispatchSystemData %dis_data.i.fca.0.insert), !continuation.registercount !16 + unreachable +} + +define void @called.resume.0({} %cont.state, i32 %returnAddr, %struct.type %0, %struct.DispatchSystemData %1) !lgc.rt.shaderstage !15 !lgc.cps !16 !continuation !17 { +entryresume.0: + %2 = call ptr addrspace(32) @lgc.cps.peek(i32 8) + %3 = load i32, ptr addrspace(20) @PAYLOAD, align 4 + %4 = extractvalue %struct.type %0, 0 + %.fca.0.extract3 = extractvalue %struct.DispatchSystemData %1, 0 + call void @amd.dx.setLocalRootIndex(i32 5) + %return.addr.reload.addr = getelementptr inbounds %called.Frame, ptr addrspace(32) %2, i32 0, i32 0 + %return.addr.reload = load i32, ptr addrspace(32) %return.addr.reload.addr, align 4 + call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) + store i32 %3, ptr addrspace(20) @PAYLOAD, align 4 + %.fca.0.insert = insertvalue %struct.DispatchSystemData poison, i32 %.fca.0.extract3, 0 + call void @lgc.cps.free(i32 8) + call void (...) @lgc.cps.jump(i32 %return.addr.reload, i32 2, %struct.type %0, %struct.DispatchSystemData %.fca.0.insert), !continuation.registercount !16 + unreachable +} + +; Function Attrs: nofree nounwind willreturn +declare void @amd.dx.setLocalRootIndex(i32) #0 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) +declare void @registerbuffer.setpointerbarrier(...) #1 + +; Function Attrs: nounwind willreturn +declare %struct.DispatchSystemData @continuations.getSystemData.s_struct.DispatchSystemDatas() #2 + +; Function Attrs: noreturn +declare void @lgc.cps.jump(...) #3 + +; Function Attrs: nounwind willreturn +declare %struct.DispatchSystemData @lgc.cps.await.s_struct.DispatchSystemDatas(...) #2 + +declare !continuation !17 { ptr, ptr } @continuation.prototype.called(ptr, i1) + +declare ptr @continuation.malloc(i32) + +declare void @continuation.free(ptr) + +; Function Attrs: nounwind +declare token @llvm.coro.id.retcon(i32, i32, ptr, ptr, ptr, ptr) #4 + +; Function Attrs: nounwind +declare ptr @llvm.coro.begin(token, ptr writeonly) #4 + +; Function Attrs: nounwind +declare i1 @llvm.coro.suspend.retcon.i1(...) #4 + +; Function Attrs: nounwind willreturn +declare %struct.DispatchSystemData @continuations.getReturnValue.s_struct.DispatchSystemDatas() #2 + +; Function Attrs: noreturn +declare void @continuation.return(...) #3 + +; Function Attrs: nounwind willreturn memory(inaccessiblemem: readwrite) +declare ptr addrspace(32) @lgc.cps.alloc(i32) #5 + +; Function Attrs: nounwind willreturn +declare i32 @lgc.cps.as.continuation.reference(...) #2 + +; Function Attrs: nounwind willreturn memory(inaccessiblemem: read) +declare ptr addrspace(32) @lgc.cps.peek(i32) #6 + +; Function Attrs: nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @lgc.cps.free(i32) #5 + +attributes #0 = { nofree nounwind willreturn } +attributes #1 = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +attributes #2 = { nounwind willreturn } +attributes #3 = { noreturn } +attributes #4 = { nounwind } +attributes #5 = { nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #6 = { nounwind willreturn memory(inaccessiblemem: read) } + +!llvm.ident = !{!0} +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.entryPoints = !{!3, !6} +!lgc.cps.module = !{} +!continuation.maxPayloadRegisterCount = !{!13} +!continuation.stackAddrspace = !{!14} + +!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"} +!1 = !{i32 1, i32 6} +!2 = !{!"lib", i32 6, i32 6} +!3 = !{null, !"", null, !4, !12} +!4 = !{!5, !9, null, null} +!5 = !{!6} +!6 = !{ptr @called, !"called", null, null, !7} +!7 = !{i32 8, i32 12, i32 6, i32 16, i32 7, i32 8, i32 5, !8} +!8 = !{i32 0} +!9 = !{!10} +!10 = !{i32 0, ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", !"RenderTarget", i32 0, i32 0, i32 1, i32 2, i1 false, i1 false, i1 false, !11} +!11 = !{i32 0, i32 9} +!12 = !{i32 0, i64 65536} +!13 = !{i32 30} +!14 = !{i32 21} +!15 = !{i32 5} +!16 = !{i32 1} +!17 = !{ptr @called} +; CPS-STACK-LOWERING-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; CPS-STACK-LOWERING-CPS-SAME: ptr [[DATA:%.*]]) { +; CPS-STACK-LOWERING-CPS-NEXT: ret i32 5 +; +; +; CPS-STACK-LOWERING-CPS-LABEL: define void @called( +; CPS-STACK-LOWERING-CPS-SAME: [[STRUCT_TYPE:%.*]] [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META15:![0-9]+]] !lgc.cps [[META16:![0-9]+]] !continuation [[META17:![0-9]+]] { +; CPS-STACK-LOWERING-CPS-NEXT: AllocaSpillBB: +; CPS-STACK-LOWERING-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 8 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP4]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP5:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @continuations.getSystemData.s_struct.DispatchSystemDatas() +; CPS-STACK-LOWERING-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP5]], 0 +; CPS-STACK-LOWERING-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP6:%.*]] = add i32 [[TMP1]], 9 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP7:%.*]] = inttoptr i32 [[TMP6]] to ptr addrspace(21) +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP7]], i32 0 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 99, ptr addrspace(21) [[TMP8]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 undef, ptr addrspace(20) @REGISTERS, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP9:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @called.resume.0) +; CPS-STACK-LOWERING-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, [[STRUCT_TYPE]] [[CONT_STATE]], i32 [[TMP9]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META16]] +; CPS-STACK-LOWERING-CPS-NEXT: unreachable +; +; +; CPS-STACK-LOWERING-CPS-LABEL: define void @called.resume.0( +; CPS-STACK-LOWERING-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURNADDR:%.*]], [[STRUCT_TYPE:%.*]] [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META15]] !lgc.cps [[META16]] !continuation [[META17]] { +; CPS-STACK-LOWERING-CPS-NEXT: entryresume.0: +; CPS-STACK-LOWERING-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -8 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_TYPE]] [[TMP0]], 0 +; CPS-STACK-LOWERING-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; CPS-STACK-LOWERING-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 +; CPS-STACK-LOWERING-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP7]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP4]], ptr addrspace(20) @REGISTERS, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], -8 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP9]], ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 2, [[STRUCT_TYPE]] [[TMP0]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META16]] +; CPS-STACK-LOWERING-CPS-NEXT: unreachable +; diff --git a/shared/continuations/test/dx/global-mem-stack.ll b/llvmraytracing/test/dx/global-mem-stack.ll similarity index 56% rename from shared/continuations/test/dx/global-mem-stack.ll rename to llvmraytracing/test/dx/global-mem-stack.ll index dfa5cd9dff..ece34abb60 100644 --- a/shared/continuations/test/dx/global-mem-stack.ll +++ b/llvmraytracing/test/dx/global-mem-stack.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t.stderr | FileCheck %s +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t.stderr | FileCheck %s ; RUN: count 0 < %t.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %struct.DispatchSystemData = type { <3 x i32> } %struct.TraversalData = type { %struct.SystemData, %struct.HitData, <3 x float>, <3 x float>, float, i64 } @@ -146,101 +146,87 @@ define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, ; CHECK-NEXT: AllocaSpillBB: ; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) ; CHECK-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; CHECK-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(22) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP4]], i32 -2 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspace(22) [[TMP5]], i32 0, i32 0, i32 7 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -8 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 28 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(22) [[TMP6]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float ; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr addrspace(22) -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP11]], i32 [[TMP9]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP12]], i32 -2 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP13]], i32 0, i32 0, i64 8 -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(22) [[TMP14]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float -; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP16]], i32 1 -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr addrspace(22) -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP19]], i32 [[TMP17]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP20]], i32 -2 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP21]], i32 0, i32 0, i64 9 -; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(22) [[TMP22]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float -; CHECK-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP24]], i32 2 -; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP26:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; CHECK-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP26]] to ptr addrspace(22) -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP27]], i32 [[TMP25]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP28]], i32 -2 -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP29]], i32 0, i32 0, i64 10 -; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(22) [[TMP30]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = bitcast i32 [[TMP31]] to float -; CHECK-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP32]], i32 3 +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], -8 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 32 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(22) [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP13]] to float +; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP14]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], -8 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 36 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(22) [[TMP18]], align 4 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float +; CHECK-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP20]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP21]], -8 +; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], 40 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(22) [[TMP24]], align 4 +; CHECK-NEXT: [[TMP26:%.*]] = bitcast i32 [[TMP25]] to float +; CHECK-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP26]], i32 3 ; CHECK-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 ; CHECK-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 ; CHECK-NEXT: [[DOTSROA_06_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; CHECK-NEXT: [[TMP33:%.*]] = bitcast float [[DOTSROA_06_0_VEC_EXTRACT]] to i32 -; CHECK-NEXT: [[TMP34:%.*]] = bitcast i32 [[TMP33]] to float -; CHECK-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP34]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = bitcast float [[DOTSROA_06_0_VEC_EXTRACT]] to i32 +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float +; CHECK-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP28]], i32 0 ; CHECK-NEXT: [[DOTSROA_06_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; CHECK-NEXT: [[TMP35:%.*]] = bitcast float [[DOTSROA_06_4_VEC_EXTRACT]] to i32 -; CHECK-NEXT: [[TMP36:%.*]] = bitcast i32 [[TMP35]] to float -; CHECK-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP36]], i32 1 +; CHECK-NEXT: [[TMP29:%.*]] = bitcast float [[DOTSROA_06_4_VEC_EXTRACT]] to i32 +; CHECK-NEXT: [[TMP30:%.*]] = bitcast i32 [[TMP29]] to float +; CHECK-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP30]], i32 1 ; CHECK-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 -; CHECK-NEXT: [[TMP38:%.*]] = fsub fast float 1.000000e+00, [[TMP37]] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 -; CHECK-NEXT: [[TMP40:%.*]] = fsub fast float [[TMP38]], [[TMP39]] -; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x float> undef, float [[TMP40]], i64 0 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP37]], i64 1 -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP39]], i64 2 -; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x float> [[TMP43]], float 1.000000e+00, i64 3 -; CHECK-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP44]], i32 0 -; CHECK-NEXT: [[TMP45:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; CHECK-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP47:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; CHECK-NEXT: [[TMP48:%.*]] = inttoptr i64 [[TMP47]] to ptr addrspace(22) -; CHECK-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP48]], i32 [[TMP46]] -; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP49]], i32 -2 -; CHECK-NEXT: [[TMP51:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspace(22) [[TMP50]], i32 0, i32 0, i32 7 -; CHECK-NEXT: store i32 [[TMP45]], ptr addrspace(22) [[TMP51]], align 4 -; CHECK-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP44]], i32 1 -; CHECK-NEXT: [[TMP52:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP54:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; CHECK-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr addrspace(22) -; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP55]], i32 [[TMP53]] -; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP56]], i32 -2 -; CHECK-NEXT: [[TMP58:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP57]], i32 0, i32 0, i64 8 -; CHECK-NEXT: store i32 [[TMP52]], ptr addrspace(22) [[TMP58]], align 4 -; CHECK-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP44]], i32 2 -; CHECK-NEXT: [[TMP59:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; CHECK-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP61:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; CHECK-NEXT: [[TMP62:%.*]] = inttoptr i64 [[TMP61]] to ptr addrspace(22) -; CHECK-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP62]], i32 [[TMP60]] -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP63]], i32 -2 -; CHECK-NEXT: [[TMP65:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP64]], i32 0, i32 0, i64 9 -; CHECK-NEXT: store i32 [[TMP59]], ptr addrspace(22) [[TMP65]], align 4 -; CHECK-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP44]], i32 3 -; CHECK-NEXT: [[TMP66:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; CHECK-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP68:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; CHECK-NEXT: [[TMP69:%.*]] = inttoptr i64 [[TMP68]] to ptr addrspace(22) -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP69]], i32 [[TMP67]] -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP70]], i32 -2 -; CHECK-NEXT: [[TMP72:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP71]], i32 0, i32 0, i64 10 -; CHECK-NEXT: store i32 [[TMP66]], ptr addrspace(22) [[TMP72]], align 4 +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 +; CHECK-NEXT: [[TMP32:%.*]] = fsub fast float 1.000000e+00, [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 +; CHECK-NEXT: [[TMP34:%.*]] = fsub fast float [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x float> undef, float [[TMP34]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP31]], i64 1 +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP36]], float [[TMP33]], i64 2 +; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x float> [[TMP37]], float 1.000000e+00, i64 3 +; CHECK-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP38]], i32 0 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], -8 +; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], 28 +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP42]] +; CHECK-NEXT: store i32 [[TMP39]], ptr addrspace(22) [[TMP43]], align 4 +; CHECK-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP38]], i32 1 +; CHECK-NEXT: [[TMP44:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP45]], -8 +; CHECK-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], 32 +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP47]] +; CHECK-NEXT: store i32 [[TMP44]], ptr addrspace(22) [[TMP48]], align 4 +; CHECK-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP38]], i32 2 +; CHECK-NEXT: [[TMP49:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; CHECK-NEXT: [[TMP51:%.*]] = add i32 [[TMP50]], -8 +; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[TMP51]], 36 +; CHECK-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP52]] +; CHECK-NEXT: store i32 [[TMP49]], ptr addrspace(22) [[TMP53]], align 4 +; CHECK-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP38]], i32 3 +; CHECK-NEXT: [[TMP54:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[TMP55]], -8 +; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[TMP56]], 40 +; CHECK-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP57]] +; CHECK-NEXT: store i32 [[TMP54]], ptr addrspace(22) [[TMP58]], align 4 ; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 -; CHECK-NEXT: [[TMP73:%.*]] = load i32, ptr [[CSP]], align 4 -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP73]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META8]] +; CHECK-NEXT: [[TMP59:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP59]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META8]] ; CHECK-NEXT: unreachable ; diff --git a/shared/continuations/test/dx/intersection-registercount.ll b/llvmraytracing/test/dx/intersection-registercount.ll similarity index 96% rename from shared/continuations/test/dx/intersection-registercount.ll rename to llvmraytracing/test/dx/intersection-registercount.ll index a29a540f27..9c6db7f9ce 100644 --- a/shared/continuations/test/dx/intersection-registercount.ll +++ b/llvmraytracing/test/dx/intersection-registercount.ll @@ -1,4 +1,4 @@ -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck %s ; RUN: count 0 < %t1.stderr ; Check that the size of @REGISTERS is as big as the continuation.registercount when there is an intersection shader @@ -8,7 +8,7 @@ ; CHECK: define void @Intersection{{.*}}!continuation.registercount ![[MDREGCOUNT:[0-9]+]] ; CHECK: ![[MDREGCOUNT]] = !{i32 25} -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } diff --git a/shared/continuations/test/dx/intrinsics/cont-payload-registers-get-i32.ll b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-get-i32.ll similarity index 74% rename from shared/continuations/test/dx/intrinsics/cont-payload-registers-get-i32.ll rename to llvmraytracing/test/dx/intrinsics/cont-payload-registers-get-i32.ll index 9170a0749c..d0ac18dfac 100644 --- a/shared/continuations/test/dx/intrinsics/cont-payload-registers-get-i32.ll +++ b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-get-i32.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function main --version 3 -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t.stderr | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function main --version 2 +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t.stderr | FileCheck %s ; RUN: count 0 < %t.stderr %struct.DispatchSystemData = type { i32 } @@ -12,6 +12,7 @@ declare i32 @_AmdContPayloadRegistersGetI32(i32) declare %struct.DispatchSystemData @_cont_SetupRayGen() +declare i32 @_cont_GetContinuationStackAddr() #0 declare !types !9 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) @@ -20,15 +21,19 @@ declare !types !11 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTrian declare !types !12 i32 @_cont_HitKind(%struct.DispatchSystemData*, %struct.HitData*) define void @main() { -; CHECK-LABEL: define void @main( -; CHECK-SAME: ) !continuation [[META11:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META12:![0-9]+]] !continuation.registercount [[META5]] !continuation.state [[META5]] { +; CHECK-LABEL: define void @main +; CHECK-SAME: () !continuation [[META11:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META12:![0-9]+]] !continuation.registercount [[META5]] !continuation.state [[META5]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() +; CHECK-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 ; CHECK-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() ; CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; CHECK-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds ([30 x i32], ptr addrspace(20) @REGISTERS, i32 0, i32 5), align 4 -; CHECK-NEXT: store i32 [[TMP0]], ptr @debug_global, align 4 -; CHECK-NEXT: call void @continuation.complete() +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds ([30 x i32], ptr addrspace(20) @REGISTERS, i32 0, i32 5), align 4 +; CHECK-NEXT: store i32 [[TMP1]], ptr @debug_global, align 4 +; CHECK-NEXT: ret void +; CHECK: entry.split: ; CHECK-NEXT: unreachable ; entry: diff --git a/llvmraytracing/test/dx/intrinsics/cont-payload-registers-i32-count.ll b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-i32-count.ll new file mode 100644 index 0000000000..0362e57888 --- /dev/null +++ b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-i32-count.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t0.stderr | FileCheck -check-prefix=MINCOUNT %s +; RUN: count 0 < %t0.stderr + +%struct.DispatchSystemData = type { i32 } + +@debug_global = external global i32 + +declare i32 @_AmdContPayloadRegistersI32Count() + +declare %struct.DispatchSystemData @_cont_SetupRayGen() +declare i32 @_cont_GetContinuationStackAddr() #0 + +declare !types !9 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) + +define void @main() { +; MINCOUNT-LABEL: define void @main +; MINCOUNT-SAME: () !continuation [[META10:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META11:![0-9]+]] !continuation.registercount [[META5]] !continuation.state [[META5]] { +; MINCOUNT-NEXT: entry: +; MINCOUNT-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; MINCOUNT-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() +; MINCOUNT-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 +; MINCOUNT-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() +; MINCOUNT-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 +; MINCOUNT-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; MINCOUNT-NEXT: store i32 11, ptr @debug_global, align 4 +; MINCOUNT-NEXT: ret void +; MINCOUNT: entry.split: +; MINCOUNT-NEXT: unreachable +; +entry: + %val = call i32 @_AmdContPayloadRegistersI32Count() + store i32 %val, i32* @debug_global, align 4 + ret void +} + +!dx.entryPoints = !{!0, !3} +!continuation.maxPayloadRegisterCount = !{!7} +!continuation.preservedPayloadRegisterCount = !{!8} + +!0 = !{null, !"", null, !1, !6} +!1 = !{!2, null, null, null} +!2 = !{!3} +!3 = !{void ()* @main, !"main", null, null, !4} +!4 = !{i32 8, i32 7, i32 6, i32 16, i32 7, i32 8, i32 5, !5} +!5 = !{i32 0} +!6 = !{i32 0, i64 65536} +!7 = !{i32 15} +!8 = !{i32 11} +!9 = !{!"function", i32 poison, !10} +!10 = !{i32 0, %struct.DispatchSystemData poison} diff --git a/shared/continuations/test/dx/intrinsics/cont-payload-registers-set-i32.ll b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-set-i32.ll similarity index 77% rename from shared/continuations/test/dx/intrinsics/cont-payload-registers-set-i32.ll rename to llvmraytracing/test/dx/intrinsics/cont-payload-registers-set-i32.ll index b4c5357a97..5de545c731 100644 --- a/shared/continuations/test/dx/intrinsics/cont-payload-registers-set-i32.ll +++ b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-set-i32.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function main --version 3 -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t.stderr | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function main --version 2 +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t.stderr | FileCheck %s ; RUN: count 0 < %t.stderr %struct.DispatchSystemData = type { i32 } @@ -10,6 +10,7 @@ declare void @_AmdContPayloadRegistersSetI32(i32, i32) declare %struct.DispatchSystemData @_cont_SetupRayGen() +declare i32 @_cont_GetContinuationStackAddr() #0 declare !types !9 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) @@ -18,14 +19,18 @@ declare !types !11 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTrian declare !types !12 i32 @_cont_HitKind(%struct.DispatchSystemData*, %struct.HitData*) define void @main() { -; CHECK-LABEL: define void @main( -; CHECK-SAME: ) !continuation [[META11:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META12:![0-9]+]] !continuation.registercount [[META5]] !continuation.state [[META5]] { +; CHECK-LABEL: define void @main +; CHECK-SAME: () !continuation [[META11:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META12:![0-9]+]] !continuation.registercount [[META5]] !continuation.state [[META5]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() +; CHECK-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 ; CHECK-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() ; CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; CHECK-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; CHECK-NEXT: store i32 42, ptr addrspace(20) getelementptr inbounds ([30 x i32], ptr addrspace(20) @REGISTERS, i32 0, i32 5), align 4 -; CHECK-NEXT: call void @continuation.complete() +; CHECK-NEXT: ret void +; CHECK: entry.split: ; CHECK-NEXT: unreachable ; entry: diff --git a/llvmraytracing/test/dx/intrinsics/cont-stack-access.ll b/llvmraytracing/test/dx/intrinsics/cont-stack-access.ll new file mode 100644 index 0000000000..9d0dc51dc7 --- /dev/null +++ b/llvmraytracing/test/dx/intrinsics/cont-stack-access.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes='dxil-cont-post-process,lint,remove-types-metadata' -S 2> %t0.stderr | FileCheck %s -check-prefix=STACK_SCRATCH +; RUN: count 0 < %t0.stderr +; RUN: grep -v SKIP_SCRATCH_ADDRSPACE %s | opt --verify-each -passes='dxil-cont-post-process,lint,remove-types-metadata' -S 2> %t1.stderr | FileCheck %s -check-prefix=STACK_GLOBAL +; RUN: count 0 < %t1.stderr + +declare i32 @_AmdContStackAlloc(i32 %size) +declare i32 @_AmdContStackLoadI32(i32 %addr) +declare i32 @_AmdContStackLoadLastUseI32(i32 %addr) +declare i32 @_AmdContStackStoreI32(i32 %addr, i32 %val) +declare i32 @_AmdContStackFree(i32 %size) +declare i32 @_cont_GetContinuationStackAddr() #0 +declare i64 @_cont_GetContinuationStackGlobalMemBase() ; SKIP_GLOBAL_ADDRSPACE + +%struct.DispatchSystemData = type { i32 } +%struct.type = type { <2 x float> } + +%struct.Payload = type { [8 x i32] } + +@debug_global = external global i32 + +define void @main(%struct.type %cont.state, i32 %return.addr, i32 %shader.index, %struct.DispatchSystemData %system.data) !lgc.rt.shaderstage !14 !lgc.cps !15 { +; STACK_SCRATCH-LABEL: define void @main( +; STACK_SCRATCH-SAME: [[STRUCT_TYPE:%.*]] [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]]) !lgc.rt.shaderstage [[META5:![0-9]+]] !lgc.cps [[META8:![0-9]+]] !continuation.stacksize [[META9:![0-9]+]] { +; STACK_SCRATCH-NEXT: entry: +; STACK_SCRATCH-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; STACK_SCRATCH-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; STACK_SCRATCH-NEXT: [[TMP0:%.*]] = load i32, ptr [[CSP]], align 4 +; STACK_SCRATCH-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 12 +; STACK_SCRATCH-NEXT: store i32 [[TMP1]], ptr [[CSP]], align 4 +; STACK_SCRATCH-NEXT: [[PTR_FINAL:%.*]] = add i32 [[TMP0]], 4 +; STACK_SCRATCH-NEXT: [[TMP2:%.*]] = inttoptr i32 [[PTR_FINAL]] to ptr addrspace(21) +; STACK_SCRATCH-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i32 0 +; STACK_SCRATCH-NEXT: store i32 10, ptr addrspace(21) [[TMP3]], align 4 +; STACK_SCRATCH-NEXT: [[TMP4:%.*]] = inttoptr i32 [[PTR_FINAL]] to ptr addrspace(21) +; STACK_SCRATCH-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 +; STACK_SCRATCH-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(21) [[TMP5]], align 4 +; STACK_SCRATCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 +; STACK_SCRATCH-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], -12 +; STACK_SCRATCH-NEXT: store i32 [[TMP8]], ptr [[CSP]], align 4 +; STACK_SCRATCH-NEXT: store i32 [[TMP6]], ptr @debug_global, align 4 +; STACK_SCRATCH-NEXT: ret void +; +; STACK_GLOBAL-LABEL: define void @main( +; STACK_GLOBAL-SAME: [[STRUCT_TYPE:%.*]] [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]]) !lgc.rt.shaderstage [[META5:![0-9]+]] !lgc.cps [[META8:![0-9]+]] !continuation.stacksize [[META9:![0-9]+]] { +; STACK_GLOBAL-NEXT: entry: +; STACK_GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; STACK_GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; STACK_GLOBAL-NEXT: [[TMP0:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; STACK_GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(22) +; STACK_GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 +; STACK_GLOBAL-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 12 +; STACK_GLOBAL-NEXT: store i32 [[TMP3]], ptr [[CSP]], align 4 +; STACK_GLOBAL-NEXT: [[PTR_FINAL:%.*]] = add i32 [[TMP2]], 4 +; STACK_GLOBAL-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP1]], i32 [[PTR_FINAL]] +; STACK_GLOBAL-NEXT: store i32 10, ptr addrspace(22) [[TMP4]], align 4 +; STACK_GLOBAL-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP1]], i32 [[PTR_FINAL]] +; STACK_GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(22) [[TMP5]], align 4 +; STACK_GLOBAL-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP1]], i32 [[PTR_FINAL]] +; STACK_GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(22) [[TMP7]], align 4, !amdgpu.last.use [[META10:![0-9]+]] +; STACK_GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 +; STACK_GLOBAL-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], -12 +; STACK_GLOBAL-NEXT: store i32 [[TMP10]], ptr [[CSP]], align 4 +; STACK_GLOBAL-NEXT: store i32 [[TMP6]], ptr @debug_global, align 4 +; STACK_GLOBAL-NEXT: ret void +; +entry: + %ptr = call i32 @_AmdContStackAlloc(i32 12) + %ptr.final = add i32 %ptr, 4 + call void @_AmdContStackStoreI32(i32 %ptr.final, i32 10) + %val = call i32 @_AmdContStackLoadI32(i32 %ptr.final) + %val.2 = call i32 @_AmdContStackLoadLastUseI32(i32 %ptr.final) ; SKIP_GLOBAL_ADDRSPACE + call void @_AmdContStackFree(i32 12) + store i32 %val, ptr @debug_global + ret void +} + +!dx.entryPoints = !{!1, !5} +!continuation.stackAddrspace = !{!16} ; SKIP_GLOBAL_ADDRSPACE +!continuation.stackAddrspace = !{!17} ; SKIP_SCRATCH_ADDRSPACE + +!1 = !{null, !"", null, !3, !2} +!2 = !{i32 0, i64 65536} +!3 = !{!4, null, null, null} +!4 = !{!5} +!5 = !{void ()* @main, !"main", null, null, !6} +!6 = !{i32 8, i32 7, i32 6, i32 16, i32 7, i32 8, i32 5, !7} +!7 = !{i32 0} +!9 = !{i32 8, i32 10, i32 6, i32 16, i32 7, i32 8, i32 5, !7} +!10 = !{!"function", !"void", !11, !11} +!11 = !{i32 0, %struct.Payload poison} +!12 = !{!"function", i32 poison, !13, !14} +!13 = !{i32 0, %struct.DispatchSystemData poison} +!14 = !{i32 0} +!15 =!{i32 1} +!16 = !{i32 22} +!17 = !{i32 21} diff --git a/shared/continuations/test/dx/intrinsics/cont-stack-alloc.ll b/llvmraytracing/test/dx/intrinsics/cont-stack-alloc.ll similarity index 69% rename from shared/continuations/test/dx/intrinsics/cont-stack-alloc.ll rename to llvmraytracing/test/dx/intrinsics/cont-stack-alloc.ll index 0cee6f236f..1a883d0619 100644 --- a/shared/continuations/test/dx/intrinsics/cont-stack-alloc.ll +++ b/llvmraytracing/test/dx/intrinsics/cont-stack-alloc.ll @@ -1,9 +1,10 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function main --version 3 -; RUN: opt --verify-each -passes='cgscc(inline),lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t.stderr | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function main --version 2 +; RUN: opt --verify-each -passes='cgscc(inline),lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t.stderr | FileCheck %s ; RUN: count 0 < %t.stderr -declare i32 @_AmdContStackAlloc(ptr %csp, i32 %size) +declare i32 @_AmdContStackAlloc(i32 %size) declare i32 @_AmdContPayloadRegistersI32Count() +declare i32 @_cont_GetContinuationStackAddr() #0 %struct.DispatchSystemData = type { i32 } %struct.HitData = type { float, i32 } @@ -18,23 +19,27 @@ declare !types !12 i32 @_cont_HitKind(%struct.DispatchSystemData*, %struct.HitDa @debug_global = external global i32 define void @main() !lgc.rt.shaderstage !17 { -; CHECK-LABEL: define void @main( -; CHECK-SAME: ) !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META11:![0-9]+]] !continuation.registercount [[META5]] !continuation [[META12:![0-9]+]] !continuation.state [[META5]] !continuation.stacksize [[META13:![0-9]+]] { +; CHECK-LABEL: define void @main +; CHECK-SAME: () !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META11:![0-9]+]] !continuation.registercount [[META5]] !continuation [[META12:![0-9]+]] !continuation.stacksize [[META13:![0-9]+]] !continuation.state [[META5]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() +; CHECK-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 ; CHECK-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() ; CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; CHECK-NEXT: [[PL_BYTES:%.*]] = mul i32 30, 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @debug_global, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 120 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 120 +; CHECK-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 ; CHECK-NEXT: store i32 [[TMP1]], ptr @debug_global, align 4 -; CHECK-NEXT: store i32 [[TMP0]], ptr @debug_global, align 4 -; CHECK-NEXT: call void @continuation.complete() +; CHECK-NEXT: ret void +; CHECK: entry.split: ; CHECK-NEXT: unreachable ; entry: %pl_size = call i32 @_AmdContPayloadRegistersI32Count() %pl_bytes = mul i32 %pl_size, 4 - %val = call i32 @_AmdContStackAlloc(ptr @debug_global, i32 %pl_bytes) + %val = call i32 @_AmdContStackAlloc(i32 %pl_bytes) store i32 %val, ptr @debug_global ret void } @@ -48,6 +53,7 @@ define void @chit(%struct.Payload* %pl, %struct.Payload* %attrs) !types !10 !lgc } !dx.entryPoints = !{!1, !5, !8} +!continuation.maxUsedPayloadRegisterCount = !{!19} !1 = !{null, !"", null, !3, !2} !2 = !{i32 0, i64 65536} @@ -67,3 +73,4 @@ define void @chit(%struct.Payload* %pl, %struct.Payload* %attrs) !types !10 !lgc !16 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !13} !17 = !{i32 0} !18 = !{i32 3} +!19 = !{i32 30} diff --git a/shared/continuations/test/dx/intrinsics/continuation-stack-is-global-false.ll b/llvmraytracing/test/dx/intrinsics/continuation-stack-is-global-false.ll similarity index 100% rename from shared/continuations/test/dx/intrinsics/continuation-stack-is-global-false.ll rename to llvmraytracing/test/dx/intrinsics/continuation-stack-is-global-false.ll diff --git a/shared/continuations/test/dx/intrinsics/continuation-stack-is-global-true.ll b/llvmraytracing/test/dx/intrinsics/continuation-stack-is-global-true.ll similarity index 100% rename from shared/continuations/test/dx/intrinsics/continuation-stack-is-global-true.ll rename to llvmraytracing/test/dx/intrinsics/continuation-stack-is-global-true.ll diff --git a/shared/continuations/test/dx/intrinsics/get-current-func-addr.ll b/llvmraytracing/test/dx/intrinsics/get-current-func-addr.ll similarity index 100% rename from shared/continuations/test/dx/intrinsics/get-current-func-addr.ll rename to llvmraytracing/test/dx/intrinsics/get-current-func-addr.ll diff --git a/shared/continuations/test/dx/intrinsics/get-flags.ll b/llvmraytracing/test/dx/intrinsics/get-flags.ll similarity index 100% rename from shared/continuations/test/dx/intrinsics/get-flags.ll rename to llvmraytracing/test/dx/intrinsics/get-flags.ll diff --git a/shared/continuations/test/dx/intrinsics/get-rtip.ll b/llvmraytracing/test/dx/intrinsics/get-rtip.ll similarity index 100% rename from shared/continuations/test/dx/intrinsics/get-rtip.ll rename to llvmraytracing/test/dx/intrinsics/get-rtip.ll diff --git a/shared/continuations/test/dx/intrinsics/get-shader-kind.ll b/llvmraytracing/test/dx/intrinsics/get-shader-kind.ll similarity index 95% rename from shared/continuations/test/dx/intrinsics/get-shader-kind.ll rename to llvmraytracing/test/dx/intrinsics/get-shader-kind.ll index c8d2d803b0..f974ca77a1 100644 --- a/shared/continuations/test/dx/intrinsics/get-shader-kind.ll +++ b/llvmraytracing/test/dx/intrinsics/get-shader-kind.ll @@ -29,7 +29,7 @@ define float @_cont_RayTCurrent() { ; Note: DXILShaderKind::Miss has value 11 define void @MyMiss(%struct.Payload* %payload) !types !1 !lgc.rt.shaderstage !16 { ; CHECK-LABEL: define %struct.DispatchSystemData @MyMiss -; CHECK-SAME: ([[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !types [[META12:![0-9]+]] !lgc.rt.shaderstage [[META14:![0-9]+]] !continuation.registercount [[META15:![0-9]+]] !continuation [[META16:![0-9]+]] { +; CHECK-SAME: ([[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META12:![0-9]+]] !continuation.registercount [[META13:![0-9]+]] !continuation [[META14:![0-9]+]] { ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_PAYLOAD:%.*]], align 8 ; CHECK-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 @@ -46,7 +46,7 @@ define void @MyMiss(%struct.Payload* %payload) !types !1 !lgc.rt.shaderstage !16 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 ; CHECK-NEXT: store i32 [[TMP8]], ptr @PAYLOAD, align 4 ; CHECK-NEXT: [[TMP9:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; CHECK-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP9]], !continuation.registercount [[META15]] +; CHECK-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP9]], !continuation.registercount [[META13]] ; %1 = call i32 @_AmdGetShaderKind() %2 = getelementptr inbounds %struct.Payload, %struct.Payload* %payload, i32 0, i32 0 diff --git a/shared/continuations/test/dx/intrinsics/shader-index.ll b/llvmraytracing/test/dx/intrinsics/shader-index.ll similarity index 94% rename from shared/continuations/test/dx/intrinsics/shader-index.ll rename to llvmraytracing/test/dx/intrinsics/shader-index.ll index 526dc160ac..e80987b244 100644 --- a/shared/continuations/test/dx/intrinsics/shader-index.ll +++ b/llvmraytracing/test/dx/intrinsics/shader-index.ll @@ -19,7 +19,7 @@ define i1 @_cont_ReportHit(%struct.DispatchSystemData* %data, float %t, i32 %hit define void @main() !lgc.rt.shaderstage !24 { ; CHECK-LABEL: define void @main( -; CHECK-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META13:![0-9]+]] !lgc.cps [[META13]] !continuation [[META14:![0-9]+]] { +; CHECK-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META11:![0-9]+]] !lgc.cps [[META11]] !continuation [[META14:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; CHECK-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 @@ -35,7 +35,7 @@ entry: define void @callable(%struct.Payload* %payload) !types !22 !lgc.rt.shaderstage !25 { ; CHECK-LABEL: define void @callable( -; CHECK-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !types [[META15:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META18:![0-9]+]] !continuation [[META19:![0-9]+]] { +; CHECK-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META15:![0-9]+]] !lgc.cps [[META16:![0-9]+]] !continuation [[META17:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_PAYLOAD:%.*]], align 8 diff --git a/shared/continuations/test/dx/intrinsics/value-i32.ll b/llvmraytracing/test/dx/intrinsics/value-i32.ll similarity index 97% rename from shared/continuations/test/dx/intrinsics/value-i32.ll rename to llvmraytracing/test/dx/intrinsics/value-i32.ll index c5686d6fe0..5bc45184a7 100644 --- a/shared/continuations/test/dx/intrinsics/value-i32.ll +++ b/llvmraytracing/test/dx/intrinsics/value-i32.ll @@ -41,8 +41,11 @@ define void @set(%struct.Payload* %pl, i32 %val) !types !4 { ret void } +!continuation.stackAddrspace = !{!5} + !0 = !{!"function", i32 poison, !1} !1 = !{i32 0, %struct.Payload poison} !2 = !{!"function", i32 poison, !1, i32 poison} !3 = !{!"function", !"void", !1, i32 poison, i32 poison} !4 = !{!"function", !"void", !1, i32 poison} +!5 = !{i32 21} diff --git a/shared/continuations/test/dx/lower-await.ll b/llvmraytracing/test/dx/lower-await.ll similarity index 58% rename from shared/continuations/test/dx/lower-await.ll rename to llvmraytracing/test/dx/lower-await.ll index 1ac4fe9abd..0eb1a4d0d1 100644 --- a/shared/continuations/test/dx/lower-await.ll +++ b/llvmraytracing/test/dx/lower-await.ll @@ -6,7 +6,7 @@ ; RUN: opt --verify-each -passes='lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint' -S %s 2> %t2.stderr | FileCheck -check-prefix=CLEANED %s ; RUN: count 0 < %t2.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %continuation.token = type { } @@ -18,7 +18,7 @@ declare %continuation.token* @async_fun_with_arg(i32) define void @simple_await() !continuation.registercount !1 { ; AWAIT-LABEL: define { ptr, ptr } @simple_await( -; AWAIT-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1:![0-9]+]] !continuation [[META2:![0-9]+]] { +; AWAIT-SAME: i64 [[RETURNADDR:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1:![0-9]+]] !continuation [[META2:![0-9]+]] { ; AWAIT-NEXT: [[TMP2:%.*]] = call token @llvm.coro.id.retcon(i32 8, i32 4, ptr [[TMP0]], ptr @continuation.prototype.simple_await, ptr @continuation.malloc, ptr @continuation.free) ; AWAIT-NEXT: [[TMP3:%.*]] = call ptr @llvm.coro.begin(token [[TMP2]], ptr null) ; AWAIT-NEXT: [[TOK:%.*]] = call ptr @async_fun(), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 @@ -27,31 +27,23 @@ define void @simple_await() !continuation.registercount !1 { ; AWAIT-NEXT: unreachable ; ; CORO-LABEL: define { ptr, ptr } @simple_await( -; CORO-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1:![0-9]+]] !continuation [[META2:![0-9]+]] { +; CORO-SAME: i64 [[RETURNADDR:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1:![0-9]+]] !continuation [[META2:![0-9]+]] { ; CORO-NEXT: AllocaSpillBB: ; CORO-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr [[TMP0]], i32 0, i32 0 ; CORO-NEXT: store i64 [[RETURNADDR]], ptr [[RETURNADDR_SPILL_ADDR]], align 4 ; CORO-NEXT: [[TOK:%.*]] = call ptr @async_fun(), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 -; CORO-NEXT: [[TMP1:%.*]] = insertvalue { ptr, ptr } [[UNDEF_OR_POISON:undef|poison]], ptr @simple_await.resume.0, 0 +; CORO-NEXT: [[TMP1:%.*]] = insertvalue { ptr, ptr } {{undef|poison}}, ptr @simple_await.resume.0, 0 ; CORO-NEXT: [[TMP2:%.*]] = insertvalue { ptr, ptr } [[TMP1]], ptr [[TOK]], 1 ; CORO-NEXT: ret { ptr, ptr } [[TMP2]] ; ; CLEANED-LABEL: define void @simple_await( -; CLEANED-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]]) !continuation.registercount [[META1:![0-9]+]] !continuation [[META2:![0-9]+]] !continuation.state [[META3:![0-9]+]] !continuation.stacksize [[META3]] { +; CLEANED-SAME: i64 [[RETURNADDR:%.*]]) !continuation.registercount [[META1:![0-9]+]] !continuation [[META2:![0-9]+]] !continuation.stacksize [[META3:![0-9]+]] !continuation.state [[META3]] { ; CLEANED-NEXT: AllocaSpillBB: -; CLEANED-NEXT: [[TMP0:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; CLEANED-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; CLEANED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 0 -; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(21) [[TMP3]], i32 0, i32 0 -; CLEANED-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 -; CLEANED-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANED-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CLEANED-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 8 -; CLEANED-NEXT: store i32 [[TMP6]], ptr [[TMP4]], align 4 -; CLEANED-NEXT: [[TMP7:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANED-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -; CLEANED-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i32 [[TMP8]], i64 ptrtoint (ptr @simple_await.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 +; CLEANED-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANED-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; CLEANED-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CLEANED-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 ; CLEANED-NEXT: unreachable ; %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 @@ -66,23 +58,21 @@ define void @simple_await_entry() !continuation.entry !0 !continuation.registerc ; AWAIT-NEXT: [[TMP3:%.*]] = call ptr @llvm.coro.begin(token [[TMP2]], ptr null) ; AWAIT-NEXT: [[TOK:%.*]] = call ptr @async_fun(), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 ; AWAIT-NEXT: [[TMP4:%.*]] = call i1 (...) @llvm.coro.suspend.retcon.i1(ptr [[TOK]]) -; AWAIT-NEXT: call void (...) @continuation.return(i64 [[UNDEF_OR_POISON:undef|poison]]) +; AWAIT-NEXT: call void (...) @continuation.return(i64 undef) ; AWAIT-NEXT: unreachable ; ; CORO-LABEL: define { ptr, ptr } @simple_await_entry( ; CORO-SAME: ptr [[TMP0:%.*]]) !continuation.registercount [[META1]] !continuation.entry [[META3:![0-9]+]] !continuation [[META4:![0-9]+]] { ; CORO-NEXT: AllocaSpillBB: ; CORO-NEXT: [[TOK:%.*]] = call ptr @async_fun(), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 -; CORO-NEXT: [[TMP1:%.*]] = insertvalue { ptr, ptr } [[UNDEF_OR_POISON]], ptr @simple_await_entry.resume.0, 0 +; CORO-NEXT: [[TMP1:%.*]] = insertvalue { ptr, ptr } {{undef|poison}}, ptr @simple_await_entry.resume.0, 0 ; CORO-NEXT: [[TMP2:%.*]] = insertvalue { ptr, ptr } [[TMP1]], ptr [[TOK]], 1 ; CORO-NEXT: ret { ptr, ptr } [[TMP2]] ; ; CLEANED-LABEL: define void @simple_await_entry( ; CLEANED-SAME: ) !continuation.registercount [[META1]] !continuation.entry [[META4:![0-9]+]] !continuation [[META5:![0-9]+]] !continuation.state [[META1]] { ; CLEANED-NEXT: AllocaSpillBB: -; CLEANED-NEXT: [[TMP0:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; CLEANED-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i32 [[TMP1]], i64 ptrtoint (ptr @simple_await_entry.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 +; CLEANED-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await_entry.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 ; CLEANED-NEXT: unreachable ; %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 @@ -93,7 +83,7 @@ define void @simple_await_entry() !continuation.entry !0 !continuation.registerc define void @await_with_arg(i32 %i) !continuation.registercount !1 { ; AWAIT-LABEL: define { ptr, ptr } @await_with_arg( -; AWAIT-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], i32 [[I:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1]] !continuation [[META5:![0-9]+]] { +; AWAIT-SAME: i64 [[RETURNADDR:%.*]], i32 [[I:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1]] !continuation [[META5:![0-9]+]] { ; AWAIT-NEXT: [[TMP2:%.*]] = call token @llvm.coro.id.retcon(i32 8, i32 4, ptr [[TMP0]], ptr @continuation.prototype.await_with_arg, ptr @continuation.malloc, ptr @continuation.free) ; AWAIT-NEXT: [[TMP3:%.*]] = call ptr @llvm.coro.begin(token [[TMP2]], ptr null) ; AWAIT-NEXT: [[TOK:%.*]] = call ptr @async_fun_with_arg(i32 [[I]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 @@ -102,31 +92,23 @@ define void @await_with_arg(i32 %i) !continuation.registercount !1 { ; AWAIT-NEXT: unreachable ; ; CORO-LABEL: define { ptr, ptr } @await_with_arg( -; CORO-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], i32 [[I:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1]] !continuation [[META5:![0-9]+]] { +; CORO-SAME: i64 [[RETURNADDR:%.*]], i32 [[I:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1]] !continuation [[META5:![0-9]+]] { ; CORO-NEXT: AllocaSpillBB: ; CORO-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_ARG_FRAME:%.*]], ptr [[TMP0]], i32 0, i32 0 ; CORO-NEXT: store i64 [[RETURNADDR]], ptr [[RETURNADDR_SPILL_ADDR]], align 4 ; CORO-NEXT: [[TOK:%.*]] = call ptr @async_fun_with_arg(i32 [[I]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 -; CORO-NEXT: [[TMP1:%.*]] = insertvalue { ptr, ptr } [[UNDEF_OR_POISON]], ptr @await_with_arg.resume.0, 0 +; CORO-NEXT: [[TMP1:%.*]] = insertvalue { ptr, ptr } {{undef|poison}}, ptr @await_with_arg.resume.0, 0 ; CORO-NEXT: [[TMP2:%.*]] = insertvalue { ptr, ptr } [[TMP1]], ptr [[TOK]], 1 ; CORO-NEXT: ret { ptr, ptr } [[TMP2]] ; ; CLEANED-LABEL: define void @await_with_arg( -; CLEANED-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], i32 [[I:%.*]]) !continuation.registercount [[META1]] !continuation [[META6:![0-9]+]] !continuation.state [[META3]] !continuation.stacksize [[META3]] { +; CLEANED-SAME: i64 [[RETURNADDR:%.*]], i32 [[I:%.*]]) !continuation.registercount [[META1]] !continuation [[META6:![0-9]+]] !continuation.stacksize [[META3]] !continuation.state [[META3]] { ; CLEANED-NEXT: AllocaSpillBB: -; CLEANED-NEXT: [[TMP0:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; CLEANED-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; CLEANED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 0 -; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_ARG_FRAME:%.*]], ptr addrspace(21) [[TMP3]], i32 0, i32 0 -; CLEANED-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 -; CLEANED-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANED-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CLEANED-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 8 -; CLEANED-NEXT: store i32 [[TMP6]], ptr [[TMP4]], align 4 -; CLEANED-NEXT: [[TMP7:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANED-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -; CLEANED-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun_with_arg to i64), i32 [[TMP8]], i64 ptrtoint (ptr @await_with_arg.resume.0 to i64), i32 [[I]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 +; CLEANED-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_ARG_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANED-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; CLEANED-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CLEANED-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun_with_arg to i64), i64 ptrtoint (ptr @await_with_arg.resume.0 to i64), i32 [[I]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 ; CLEANED-NEXT: unreachable ; %tok = call %continuation.token* @async_fun_with_arg(i32 %i), !continuation.registercount !1, !continuation.returnedRegistercount !1 @@ -136,41 +118,33 @@ define void @await_with_arg(i32 %i) !continuation.registercount !1 { define i32 @await_with_ret_value() !continuation.registercount !1 { ; AWAIT-LABEL: define { ptr, ptr } @await_with_ret_value( -; AWAIT-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1]] !continuation [[META6:![0-9]+]] { +; AWAIT-SAME: i64 [[RETURNADDR:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1]] !continuation [[META6:![0-9]+]] { ; AWAIT-NEXT: [[TMP2:%.*]] = call token @llvm.coro.id.retcon(i32 8, i32 4, ptr [[TMP0]], ptr @continuation.prototype.await_with_ret_value, ptr @continuation.malloc, ptr @continuation.free) ; AWAIT-NEXT: [[TMP3:%.*]] = call ptr @llvm.coro.begin(token [[TMP2]], ptr null) ; AWAIT-NEXT: [[TOK:%.*]] = call ptr @async_fun(), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 ; AWAIT-NEXT: [[TMP4:%.*]] = call i1 (...) @llvm.coro.suspend.retcon.i1(ptr [[TOK]]) -; AWAIT-NEXT: [[TMP5:%.*]] = call i32 @continuations.getReturnValue.i32() +; AWAIT-NEXT: [[TMP5:%.*]] = call i32 @continuations.getReturnValue__i32() ; AWAIT-NEXT: call void (...) @continuation.return(i64 [[RETURNADDR]], i32 [[TMP5]]), !continuation.registercount [[META1]] ; AWAIT-NEXT: unreachable ; ; CORO-LABEL: define { ptr, ptr } @await_with_ret_value( -; CORO-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1]] !continuation [[META6:![0-9]+]] { +; CORO-SAME: i64 [[RETURNADDR:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1]] !continuation [[META6:![0-9]+]] { ; CORO-NEXT: AllocaSpillBB: ; CORO-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_RET_VALUE_FRAME:%.*]], ptr [[TMP0]], i32 0, i32 0 ; CORO-NEXT: store i64 [[RETURNADDR]], ptr [[RETURNADDR_SPILL_ADDR]], align 4 ; CORO-NEXT: [[TOK:%.*]] = call ptr @async_fun(), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 -; CORO-NEXT: [[TMP1:%.*]] = insertvalue { ptr, ptr } [[UNDEF_OR_POISON]], ptr @await_with_ret_value.resume.0, 0 +; CORO-NEXT: [[TMP1:%.*]] = insertvalue { ptr, ptr } {{undef|poison}}, ptr @await_with_ret_value.resume.0, 0 ; CORO-NEXT: [[TMP2:%.*]] = insertvalue { ptr, ptr } [[TMP1]], ptr [[TOK]], 1 ; CORO-NEXT: ret { ptr, ptr } [[TMP2]] ; ; CLEANED-LABEL: define void @await_with_ret_value( -; CLEANED-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]]) !continuation.registercount [[META1]] !continuation [[META7:![0-9]+]] !continuation.state [[META3]] !continuation.stacksize [[META3]] { +; CLEANED-SAME: i64 [[RETURNADDR:%.*]]) !continuation.registercount [[META1]] !continuation [[META7:![0-9]+]] !continuation.stacksize [[META3]] !continuation.state [[META3]] { ; CLEANED-NEXT: AllocaSpillBB: -; CLEANED-NEXT: [[TMP0:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; CLEANED-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; CLEANED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 0 -; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_RET_VALUE_FRAME:%.*]], ptr addrspace(21) [[TMP3]], i32 0, i32 0 -; CLEANED-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 -; CLEANED-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANED-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CLEANED-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 8 -; CLEANED-NEXT: store i32 [[TMP6]], ptr [[TMP4]], align 4 -; CLEANED-NEXT: [[TMP7:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANED-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -; CLEANED-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i32 [[TMP8]], i64 ptrtoint (ptr @await_with_ret_value.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 +; CLEANED-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_RET_VALUE_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANED-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; CLEANED-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CLEANED-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @await_with_ret_value.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 ; CLEANED-NEXT: unreachable ; %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 @@ -180,7 +154,7 @@ define i32 @await_with_ret_value() !continuation.registercount !1 { define void @wait_await() !continuation.registercount !1 { ; AWAIT-LABEL: define { ptr, ptr } @wait_await( -; AWAIT-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1]] !continuation [[META7:![0-9]+]] { +; AWAIT-SAME: i64 [[RETURNADDR:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1]] !continuation [[META7:![0-9]+]] { ; AWAIT-NEXT: [[TMP2:%.*]] = call token @llvm.coro.id.retcon(i32 8, i32 4, ptr [[TMP0]], ptr @continuation.prototype.wait_await, ptr @continuation.malloc, ptr @continuation.free) ; AWAIT-NEXT: [[TMP3:%.*]] = call ptr @llvm.coro.begin(token [[TMP2]], ptr null) ; AWAIT-NEXT: [[TOK:%.*]] = call ptr @async_fun_with_waitmask(i64 -1), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1, !continuation.wait.await [[META3]] @@ -189,31 +163,23 @@ define void @wait_await() !continuation.registercount !1 { ; AWAIT-NEXT: unreachable ; ; CORO-LABEL: define { ptr, ptr } @wait_await( -; CORO-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1]] !continuation [[META7:![0-9]+]] { +; CORO-SAME: i64 [[RETURNADDR:%.*]], ptr [[TMP0:%.*]]) !continuation.registercount [[META1]] !continuation [[META7:![0-9]+]] { ; CORO-NEXT: AllocaSpillBB: ; CORO-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[WAIT_AWAIT_FRAME:%.*]], ptr [[TMP0]], i32 0, i32 0 ; CORO-NEXT: store i64 [[RETURNADDR]], ptr [[RETURNADDR_SPILL_ADDR]], align 4 ; CORO-NEXT: [[TOK:%.*]] = call ptr @async_fun_with_waitmask(i64 -1), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1, !continuation.wait.await [[META3]] -; CORO-NEXT: [[TMP1:%.*]] = insertvalue { ptr, ptr } [[UNDEF_OR_POISON:undef|poison]], ptr @wait_await.resume.0, 0 +; CORO-NEXT: [[TMP1:%.*]] = insertvalue { ptr, ptr } {{undef|poison}}, ptr @wait_await.resume.0, 0 ; CORO-NEXT: [[TMP2:%.*]] = insertvalue { ptr, ptr } [[TMP1]], ptr [[TOK]], 1 ; CORO-NEXT: ret { ptr, ptr } [[TMP2]] ; ; CLEANED-LABEL: define void @wait_await( -; CLEANED-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]]) !continuation.registercount [[META1]] !continuation [[META8:![0-9]+]] !continuation.state [[META3]] !continuation.stacksize [[META3]] { +; CLEANED-SAME: i64 [[RETURNADDR:%.*]]) !continuation.registercount [[META1]] !continuation [[META8:![0-9]+]] !continuation.stacksize [[META3]] !continuation.state [[META3]] { ; CLEANED-NEXT: AllocaSpillBB: -; CLEANED-NEXT: [[TMP0:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; CLEANED-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; CLEANED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 0 -; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[WAIT_AWAIT_FRAME:%.*]], ptr addrspace(21) [[TMP3]], i32 0, i32 0 -; CLEANED-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 -; CLEANED-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANED-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CLEANED-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 8 -; CLEANED-NEXT: store i32 [[TMP6]], ptr [[TMP4]], align 4 -; CLEANED-NEXT: [[TMP7:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANED-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -; CLEANED-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 ptrtoint (ptr @async_fun_with_waitmask to i64), i64 -1, i32 [[TMP8]], i64 ptrtoint (ptr @wait_await.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 +; CLEANED-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[WAIT_AWAIT_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANED-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; CLEANED-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CLEANED-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 ptrtoint (ptr @async_fun_with_waitmask to i64), i64 -1, i64 ptrtoint (ptr @wait_await.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 ; CLEANED-NEXT: unreachable ; %tok = call %continuation.token* @async_fun_with_waitmask(i64 -1), !continuation.wait.await !0, !continuation.registercount !1, !continuation.returnedRegistercount !1 diff --git a/shared/continuations/test/dx/lower-rt-pipeline-call-shader.ll b/llvmraytracing/test/dx/lower-rt-pipeline-call-shader.ll similarity index 51% rename from shared/continuations/test/dx/lower-rt-pipeline-call-shader.ll rename to llvmraytracing/test/dx/lower-rt-pipeline-call-shader.ll index d319d92100..93b6619357 100644 --- a/shared/continuations/test/dx/lower-rt-pipeline-call-shader.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-call-shader.ll @@ -3,8 +3,12 @@ ; RUN: count 0 < %t0.stderr ; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-CPS %s ; RUN: count 0 < %t1.stderr +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,register-buffer,lint,remove-types-metadata' -S %s 2> %t2.stderr | FileCheck -check-prefix=REGISTERBUFFER-CPS %s +; RUN: count 0 < %t2.stderr +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t3.stderr | FileCheck -check-prefix=POSTPROCESS-CPS %s +; RUN: count 0 < %t3.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.DispatchSystemData = type { i32 } @@ -17,7 +21,9 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32- declare i32 @_cont_GetContinuationStackAddr() -declare !types !13 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !types !13 { + ret i32 5 +} declare %struct.DispatchSystemData @_cont_SetupRayGen() @@ -74,8 +80,13 @@ attributes #0 = { nounwind } !18 = !{!"function", !"void", !14, i32 poison} !19 = !{!"function", !"void", i32 poison, i32 poison, !20} !20 = !{i32 0, %struct.TheirParams poison} +; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( +; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 5 +; +; ; LOWERRAYTRACINGPIPELINE-LABEL: define void @main( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !continuation.entry [[META16:![0-9]+]] !continuation.registercount [[META9]] !continuation [[META17:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !continuation.entry [[META17:![0-9]+]] !continuation.registercount [[META9]] !continuation [[META18:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[PARAMS:%.*]] = alloca [[STRUCT_THEIRPARAMS:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 @@ -84,7 +95,7 @@ attributes #0 = { nounwind } ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP3]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META18:![0-9]+]], !continuation.returnedRegistercount !18 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META15:![0-9]+]], !continuation.returnedRegistercount !15 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP4]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_THEIRPARAMS]] poison, ptr [[PARAMS]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 @@ -95,8 +106,13 @@ attributes #0 = { nounwind } ; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META14:![0-9]+]] ; ; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr [[DATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret i32 5 +; +; ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @main( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !lgc.cps [[META9]] !continuation [[META16:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !lgc.cps [[META9]] !continuation [[META17:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PARAMS:%.*]] = alloca [[STRUCT_THEIRPARAMS:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 @@ -105,13 +121,69 @@ attributes #0 = { nounwind } ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP3]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] (...) @lgc.cps.await.s_struct.DispatchSystemDatas(i32 2, i32 2, i32 [[TMP4]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] (...) @lgc.cps.await__s_struct.DispatchSystemDatas(i32 2, i32 2, i32 5), !continuation.returnedRegistercount !15, !continuation.registercount [[META15:![0-9]+]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_THEIRPARAMS]] poison, ptr [[PARAMS]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP5]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP4]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret void ; +; +; REGISTERBUFFER-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; REGISTERBUFFER-CPS-SAME: ptr [[DATA:%.*]]) { +; REGISTERBUFFER-CPS-NEXT: ret i32 5 +; +; +; REGISTERBUFFER-CPS-LABEL: define void @main( +; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !lgc.cps [[META8]] !continuation [[META16:![0-9]+]] { +; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; REGISTERBUFFER-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: store i32 undef, ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @main.resume.0) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, {} poison, i32 [[TMP1]], i32 5), !continuation.returnedRegistercount !14, !continuation.registercount [[META14:![0-9]+]] +; REGISTERBUFFER-CPS-NEXT: unreachable +; +; +; REGISTERBUFFER-CPS-LABEL: define dso_local void @main.resume.0( +; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META8]] !lgc.cps [[META8]] !continuation [[META16]] { +; REGISTERBUFFER-CPS-NEXT: entryresume.0: +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; REGISTERBUFFER-CPS-NEXT: ret void +; +; +; POSTPROCESS-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; POSTPROCESS-CPS-SAME: ptr [[DATA:%.*]]) { +; POSTPROCESS-CPS-NEXT: ret i32 5 +; +; +; POSTPROCESS-CPS-LABEL: define void @main( +; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !lgc.cps [[META8]] !continuation [[META16:![0-9]+]] { +; POSTPROCESS-CPS-NEXT: AllocaSpillBB: +; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 +; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; POSTPROCESS-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: store i32 undef, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @main.resume.0) +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, {} poison, i32 [[TMP3]], i32 5), !continuation.returnedRegistercount !14, !continuation.registercount [[META14:![0-9]+]] +; POSTPROCESS-CPS-NEXT: unreachable +; +; +; POSTPROCESS-CPS-LABEL: define dso_local void @main.resume.0( +; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META8]] !lgc.cps [[META8]] !continuation [[META16]] { +; POSTPROCESS-CPS-NEXT: entryresume.0: +; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; POSTPROCESS-CPS-NEXT: ret void +; diff --git a/shared/continuations/test/dx/lower-rt-pipeline-intrinsics-hit.ll b/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics-hit.ll similarity index 97% rename from shared/continuations/test/dx/lower-rt-pipeline-intrinsics-hit.ll rename to llvmraytracing/test/dx/lower-rt-pipeline-intrinsics-hit.ll index 408476846d..2635987b31 100644 --- a/shared/continuations/test/dx/lower-rt-pipeline-intrinsics-hit.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics-hit.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s ; RUN: count 0 < %t0.stderr -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s ; RUN: count 0 < %t1.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } @@ -190,10 +190,14 @@ define void @RayGen() #3 { ; DXILCONTPOSTPROCESS-LABEL: define void @RayGen( ; DXILCONTPOSTPROCESS-SAME: ) #[[ATTR5:[0-9]+]] !lgc.rt.shaderstage [[META18:![0-9]+]] !continuation [[META27:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META18]] !continuation.state [[META18]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: +; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; DXILCONTPOSTPROCESS-NEXT: call void @continuation.complete() +; DXILCONTPOSTPROCESS-NEXT: ret void +; DXILCONTPOSTPROCESS: AllocaSpillBB.split: ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ret void @@ -241,18 +245,18 @@ define void @Intersection() #3 { ; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP19]], !continuation.registercount [[META26]] ; ; DXILCONTPOSTPROCESS-LABEL: define void @Intersection( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META28:![0-9]+]] !continuation [[META29:![0-9]+]] !continuation.registercount [[META25:![0-9]+]] !continuation.state [[META30:![0-9]+]] !continuation.stacksize [[META30]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META28:![0-9]+]] !continuation [[META29:![0-9]+]] !continuation.registercount [[META25:![0-9]+]] !continuation.stacksize [[META30:![0-9]+]] !continuation.state [[META30]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 -; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; DXILCONTPOSTPROCESS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i64 0 -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[INTERSECTION_FRAME:%.*]], ptr addrspace(21) [[TMP4]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP5]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 1 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 @@ -263,18 +267,18 @@ define void @Intersection() #3 { ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = call float @_cont_RayTMin(ptr [[TMP5]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = call float @_cont_RayTMin(ptr [[TMP6]]) ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, float [[DOTFCA_0_1_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], i32 [[DOTFCA_0_1_1_EXTRACT]], 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA:%.*]], ptr [[TMP8]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA:%.*]], ptr [[TMP9]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I:%.*]] = load [[STRUCT_HITDATA]], ptr [[RESPTR_I]], align 4 ; DXILCONTPOSTPROCESS-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP1]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP7]], ptr [[TMP1]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP8]], ptr [[TMP1]]) ; DXILCONTPOSTPROCESS-NEXT: [[RES_I3_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, float [[DOTFCA_0_1_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I3_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I3_FCA_0_INSERT]], i32 [[DOTFCA_0_1_1_EXTRACT]], 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I3_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I3_FCA_1_INSERT]], 0 @@ -289,12 +293,12 @@ define void @Intersection() #3 { ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 8 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP11]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Intersection.resume.0 to i64)) -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP12]], i64 [[TMP13]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float 4.000000e+00, i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META25]], !continuation.returnedRegistercount !25 +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Intersection.resume.0 to i64)) +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP13]], i64 [[TMP14]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float 4.000000e+00, i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META25]], !continuation.returnedRegistercount !25 ; DXILCONTPOSTPROCESS-NEXT: unreachable ; %1 = call float @dx.op.rayTMin.f32(i32 153) diff --git a/shared/continuations/test/dx/lower-rt-pipeline-intrinsics.ll b/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics.ll similarity index 99% rename from shared/continuations/test/dx/lower-rt-pipeline-intrinsics.ll rename to llvmraytracing/test/dx/lower-rt-pipeline-intrinsics.ll index cebbea4656..116424b268 100644 --- a/shared/continuations/test/dx/lower-rt-pipeline-intrinsics.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function ClosestHit --version 3 ; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s ; RUN: count 0 < %t0.stderr -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s ; RUN: count 0 < %t1.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } @@ -228,10 +228,10 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA1:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; DXILCONTPOSTPROCESS-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA1]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA1]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 diff --git a/llvmraytracing/test/dx/lower-rt-pipeline-large-payload.ll b/llvmraytracing/test/dx/lower-rt-pipeline-large-payload.ll new file mode 100644 index 0000000000..1d354802ae --- /dev/null +++ b/llvmraytracing/test/dx/lower-rt-pipeline-large-payload.ll @@ -0,0 +1,743 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 +; Test handling of large payloads and payload spilling. +; We set the max number of payload registers to 2, so relatively small payloads need to spill already. +; This results in a bit nicer result IR, containing less "spam" copying payload fields around. +; We also set a max hit attribute size ensuring there is no need for hit attribute storage in the payload. +; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s +; RUN: count 0 < %t0.stderr +; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,remove-types-metadata' -S 2> %t1.stderr | FileCheck -check-prefix=CLEANUP %s +; RUN: count 0 < %t1.stderr +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,remove-types-metadata' -S %s 2> %t2.stderr | FileCheck -check-prefix=CLEANUP-CPS %s +; RUN: count 0 < %t2.stderr +; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S 2> %t3.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s +; RUN: count 0 < %t3.stderr + +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" + +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +; Doesn't need to spill: +%struct.SmallPayload = type { [1 x i32] } +; These two need to spill: +%struct.MediumPayload = type { [3 x i32] } +%struct.LargePayload = type { [5 x i32] } +%struct.DispatchSystemData = type { <3 x i32> } +%struct.TraversalData = type { %struct.SystemData, %struct.HitData, <3 x float>, <3 x float>, float, i64 } +%struct.SystemData = type { %struct.DispatchSystemData } +%struct.HitData = type { <3 x float>, <3 x float>, float, i32 } +%struct.AnyHitTraversalData = type { %struct.TraversalData, %struct.HitData } +%struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } + +@"\01?Scene@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 + +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #3 +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #4 +declare !types !200 void @dx.op.traceRay.struct.SmallPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.SmallPayload*) +declare !types !201 void @dx.op.traceRay.struct.MediumPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.MediumPayload*) +declare !types !202 void @dx.op.traceRay.struct.LargePayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.LargePayload*) + +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #1 !types !203 { + %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 + %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 + %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 + %trav_data2 = insertvalue %struct.TraversalData %trav_data, i64 -1, 5 + %newdata = call %struct.DispatchSystemData @_AmdAwaitTraversal(i64 4, %struct.TraversalData %trav_data2) + store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 + call void @_AmdRestoreSystemData(%struct.DispatchSystemData* %data) + ret void +} + +define void @Miss(%struct.SmallPayload* noalias nocapture %outerpayload) !types !204 !lgc.rt.attribute.size !32 { + %p1 = alloca %struct.SmallPayload + %p2 = alloca %struct.MediumPayload + %p3 = alloca %struct.LargePayload + ; Avoid undefs being written to payload registers + ; caused by uninitialized payloads. + store %struct.SmallPayload zeroinitializer, %struct.SmallPayload* %p1 + store %struct.MediumPayload zeroinitializer, %struct.LargePayload* %p2 + store %struct.LargePayload zeroinitializer, %struct.MediumPayload* %p3 + + %t1 = load %dx.types.Handle, %dx.types.Handle* @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 + %t2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %t1) + %t3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %t2, %dx.types.ResourceProperties { i32 16, i32 0 }) + + call void @dx.op.traceRay.struct.SmallPayload(i32 157, %dx.types.Handle %t3, i32 16, i32 -1, i32 0, i32 1, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0x3F50624DE0000000, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+04, %struct.SmallPayload* nonnull %p1) + call void @dx.op.traceRay.struct.MediumPayload(i32 157, %dx.types.Handle %t3, i32 16, i32 -1, i32 0, i32 1, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0x3F50624DE0000000, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+04, %struct.MediumPayload* nonnull %p2) + call void @dx.op.traceRay.struct.LargePayload(i32 157, %dx.types.Handle %t3, i32 16, i32 -1, i32 0, i32 1, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0x3F50624DE0000000, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+04, %struct.LargePayload* nonnull %p3) + ret void +} + +; Function Attrs: alwaysinline +declare %struct.DispatchSystemData @_cont_SetupRayGen() #1 + +; Function Attrs: alwaysinline +declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) #1 + +; Function Attrs: alwaysinline +declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) #1 + +; Function Attrs: alwaysinline +declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #1 + +; Function Attrs: alwaysinline +declare !types !19 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #1 + +; Function Attrs: alwaysinline +declare !types !21 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #1 + +; Function Attrs: alwaysinline +declare !types !22 i1 @_cont_IsEndSearch(%struct.TraversalData*) #1 + +; Function Attrs: nounwind memory(read) +declare !types !24 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #2 + +; Function Attrs: nounwind memory(none) +declare !types !26 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #3 + +; Function Attrs: nounwind memory(none) +declare !types !28 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #3 + +; Function Attrs: alwaysinline +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #1 !types !30 { + ret i32 5 +} + +attributes #0 = { nounwind } +attributes #1 = { alwaysinline } +attributes #2 = { nounwind memory(read) } +attributes #3 = { nounwind memory(none) } + +!llvm.ident = !{!0} +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.typeAnnotations = !{!3} +!dx.entryPoints = !{!12, !14} +!continuation.maxPayloadRegisterCount = !{!31} +!lgc.cps.module = !{} + +!0 = !{!"dxcoob 2019.05.00"} +!1 = !{i32 1, i32 7} +!2 = !{!"lib", i32 6, i32 7} +!3 = !{i32 1, void (%struct.SmallPayload*)* @Miss, !4} +!4 = !{!5, !7} +!5 = !{i32 1, !6, !6} +!6 = !{} +!7 = !{i32 2, !6, !6} +!9 = !{!10, !11, !11} +!10 = !{i32 0, i32 259} +!11 = !{i32 0, i32 513} +!12 = !{null, !"", null, null, !13} +!13 = !{i32 0, i64 32} +!14 = !{void (%struct.SmallPayload*)* @Miss, !"Miss", null, null, !15} +!15 = !{i32 8, i32 11, i32 6, i32 24, i32 5, !16} +!16 = !{i32 0} +!19 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !20} +!20 = !{i32 0, %struct.SystemData poison} +!21 = !{!"function", !"void", !20, %struct.BuiltInTriangleIntersectionAttributes poison} +!22 = !{!"function", i1 poison, !23} +!23 = !{i32 0, %struct.TraversalData poison} +!24 = !{!"function", i32 poison, !20, !25} +!25 = !{i32 0, %struct.HitData poison} +!26 = !{!"function", !"void", !27} +!27 = !{i32 0, %struct.DispatchSystemData poison} +!28 = !{!"function", !"void", !29} +!29 = !{i32 0, %struct.AnyHitTraversalData poison} +!30 = !{!"function", i32 poison, !27} +!31 = !{i32 2} +!32 = !{i32 8} + +!100 = !{i32 0, %struct.SmallPayload poison} +!101 = !{i32 0, %struct.MediumPayload poison} +!102 = !{i32 0, %struct.LargePayload poison} +!103 = !{i32 0, %struct.DispatchSystemData poison} +!200 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !100} +!201 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !101} +!202 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !102} +!203 = !{!"function", !"void", !103, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} +!204 = !{!"function", !"void", !100} +; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @Miss( +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !continuation.registercount [[META18:![0-9]+]] !continuation [[META19:![0-9]+]] !continuation.stacksize [[META20:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-NEXT: [[P1:%.*]] = alloca [[STRUCT_SMALLPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[P2:%.*]] = alloca [[STRUCT_MEDIUMPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[P3:%.*]] = alloca [[STRUCT_LARGEPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = alloca [4 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_SMALLPAYLOAD]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP5]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SMALLPAYLOAD]] zeroinitializer, ptr [[P1]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_MEDIUMPAYLOAD]] zeroinitializer, ptr [[P2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_LARGEPAYLOAD]] zeroinitializer, ptr [[P3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[T1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T1]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 -1, 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[P1]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load [1 x i32], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP9]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META18]], !continuation.returnedRegistercount !18 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP10]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SMALLPAYLOAD]] poison, ptr [[P1]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[P1]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP13]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP11]], ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT10:%.*]] +; LOWERRAYTRACINGPIPELINE: .split10: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I1:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I2:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I1]], 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I3:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I2]], 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I4:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I3]], i64 -1, 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[PAYLOAD_SPILL_ALLOCA]] to i32 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_MEDIUMPAYLOAD]], ptr [[P2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP18]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP18]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP18]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I4]]), !continuation.registercount [[META14:![0-9]+]], !continuation.returnedRegistercount !14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP25]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_MEDIUMPAYLOAD]] poison, ptr [[P2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_MEDIUMPAYLOAD]], ptr [[P2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP27]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP28]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[TMP28]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = load i32, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP28]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load i32, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP26]], ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT9:%.*]] +; LOWERRAYTRACINGPIPELINE: .split9: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I5:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP36]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I6:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I5]], 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I7:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I6]], 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I8:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I7]], i64 -1, 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[PAYLOAD_SPILL_ALLOCA]] to i32 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP37]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_LARGEPAYLOAD]], ptr [[P3]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[TMP38]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[TMP39]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[TMP39]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr i32, ptr [[TMP39]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP45]], ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr [[TMP39]], i64 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP47]], ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 4), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[TMP39]], i64 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 5), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I8]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP50]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_LARGEPAYLOAD]] poison, ptr [[P3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_LARGEPAYLOAD]], ptr [[P3]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = getelementptr i32, ptr [[TMP52]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP53]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP55]], ptr [[TMP54]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr [[TMP53]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP57]], ptr [[TMP56]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = getelementptr i32, ptr [[TMP53]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP59]], ptr [[TMP58]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP53]], i64 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 4), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP61]], ptr [[TMP60]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = getelementptr i32, ptr [[TMP53]], i64 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 5), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP63]], ptr [[TMP62]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP51]], ptr [[TMP36]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] +; LOWERRAYTRACINGPIPELINE: .split: +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load [1 x i32], ptr [[TMP64]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP65]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP66]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP67]], !continuation.registercount [[META18]] +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( +; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) #[[ATTR1:[0-9]+]] { +; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 5 +; +; +; CLEANUP-LABEL: define void @Miss( +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !continuation.registercount [[META18:![0-9]+]] !continuation [[META19:![0-9]+]] !continuation.stacksize [[META20:![0-9]+]] !continuation.state [[META21:![0-9]+]] { +; CLEANUP-NEXT: AllocaSpillBB: +; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 +; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 1 +; CLEANUP-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; CLEANUP-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; CLEANUP-NEXT: [[TMP2:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP2]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT_SPILL_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 2 +; CLEANUP-NEXT: store i32 [[DOTFCA_0_EXTRACT]], ptr addrspace(32) [[DOTFCA_0_EXTRACT_SPILL_ADDR]], align 4 +; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CLEANUP-NEXT: [[T1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; CLEANUP-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T1]]) +; CLEANUP-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-NEXT: [[TMP3:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) +; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; CLEANUP-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 +; CLEANUP-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; CLEANUP-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 -1, 5 +; CLEANUP-NEXT: [[DOTFCA_0_INSERT30:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 +; CLEANUP-NEXT: store [1 x i32] [[DOTFCA_0_INSERT30]], ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 28) +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i64 ptrtoint (ptr @Miss.resume.0 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META18]], !continuation.returnedRegistercount !18 +; CLEANUP-NEXT: unreachable +; +; +; CLEANUP-LABEL: define dso_local void @Miss.resume.0( +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META16]] !lgc.rt.shaderstage [[META17]] !continuation.registercount [[META18]] !continuation [[META19]] { +; CLEANUP-NEXT: entryresume.0: +; CLEANUP-NEXT: call void @lgc.cps.free(i32 28) +; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 +; CLEANUP-NEXT: [[TMP2:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT32:%.*]] = extractvalue [1 x i32] [[TMP2]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT13:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CLEANUP-NEXT: [[T110:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; CLEANUP-NEXT: [[T29:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T110]]) +; CLEANUP-NEXT: [[T38:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T29]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-NEXT: [[TMP3:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T38]]) +; CLEANUP-NEXT: [[DIS_DATA_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT13]], 0 +; CLEANUP-NEXT: [[SYS_DATA_I2:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I1_FCA_0_INSERT]], 0 +; CLEANUP-NEXT: [[TRAV_DATA_I3:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I2]], 0 +; CLEANUP-NEXT: [[TRAV_DATA2_I4:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I3]], i64 -1, 5 +; CLEANUP-NEXT: [[TMP4:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 +; CLEANUP-NEXT: store i32 [[TMP4]], ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; CLEANUP-NEXT: store i32 0, ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 +; CLEANUP-NEXT: store i32 0, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 +; CLEANUP-NEXT: store i32 0, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 28) +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i64 ptrtoint (ptr @Miss.resume.1 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I4]]), !continuation.registercount [[META14:![0-9]+]], !continuation.returnedRegistercount !14 +; CLEANUP-NEXT: unreachable +; +; +; CLEANUP-LABEL: define dso_local void @Miss.resume.1( +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META16]] !lgc.rt.shaderstage [[META17]] !continuation.registercount [[META14]] !continuation [[META19]] { +; CLEANUP-NEXT: entryresume.1: +; CLEANUP-NEXT: call void @lgc.cps.free(i32 28) +; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 +; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 +; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT15:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CLEANUP-NEXT: [[T17:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; CLEANUP-NEXT: [[T26:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T17]]) +; CLEANUP-NEXT: [[T35:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T26]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-NEXT: [[TMP5:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T35]]) +; CLEANUP-NEXT: [[DIS_DATA_I5_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT15]], 0 +; CLEANUP-NEXT: [[SYS_DATA_I6:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I5_FCA_0_INSERT]], 0 +; CLEANUP-NEXT: [[TRAV_DATA_I7:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I6]], 0 +; CLEANUP-NEXT: [[TRAV_DATA2_I8:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I7]], i64 -1, 5 +; CLEANUP-NEXT: [[TMP6:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 +; CLEANUP-NEXT: store i32 [[TMP6]], ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; CLEANUP-NEXT: store i32 0, ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 +; CLEANUP-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 +; CLEANUP-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 +; CLEANUP-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 4), align 4 +; CLEANUP-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 5), align 4 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 28) +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i64 ptrtoint (ptr @Miss.resume.2 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I8]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 +; CLEANUP-NEXT: unreachable +; +; +; CLEANUP-LABEL: define dso_local void @Miss.resume.2( +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META16]] !lgc.rt.shaderstage [[META17]] !continuation.registercount [[META14]] !continuation [[META19]] { +; CLEANUP-NEXT: entryresume.2: +; CLEANUP-NEXT: call void @lgc.cps.free(i32 28) +; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 +; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 +; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 +; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 4), align 4 +; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 5), align 4 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT17:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 2 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT_RELOAD:%.*]] = load i32, ptr addrspace(32) [[DOTFCA_0_EXTRACT_RELOAD_ADDR]], align 4 +; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 1 +; CLEANUP-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 +; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[DOTFCA_0_EXTRACT_RELOAD]], 0 +; CLEANUP-NEXT: store [1 x i32] [[DOTFCA_0_INSERT]], ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DOTFCA_0_INSERT12:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT17]], 0 +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT12]]), !continuation.registercount [[META18]] +; CLEANUP-NEXT: unreachable +; +; +; CLEANUP-LABEL: define i32 @_cont_GetLocalRootIndex( +; CLEANUP-SAME: ptr [[DATA:%.*]]) #[[ATTR1:[0-9]+]] { +; CLEANUP-NEXT: ret i32 5 +; +; +; CLEANUP-CPS-LABEL: define void @Miss( +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META18:![0-9]+]] !continuation [[META19:![0-9]+]] !continuation.stacksize [[META20:![0-9]+]] { +; CLEANUP-CPS-NEXT: AllocaSpillBB: +; CLEANUP-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 20) +; CLEANUP-CPS-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RETURN_ADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(32) [[RETURN_ADDR_SPILL_ADDR]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; CLEANUP-CPS-NEXT: [[T1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; CLEANUP-CPS-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T1]]) +; CLEANUP-CPS-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) +; CLEANUP-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 -1, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT28:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 +; CLEANUP-CPS-NEXT: store [1 x i32] [[DOTFCA_0_INSERT28]], ptr @PAYLOAD, align 4 +; CLEANUP-CPS-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @Miss.resume.0) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i32 [[TMP2]], i32 5), !continuation.returnedRegistercount !18, !continuation.registercount [[META18]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define dso_local void @Miss.resume.0( +; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.attribute.size [[META16]] !lgc.rt.shaderstage [[META17]] !lgc.cps [[META18]] !continuation [[META19]] !continuation.stacksize [[META20]] { +; CLEANUP-CPS-NEXT: entryresume.0: +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 20) +; CLEANUP-CPS-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT30:%.*]] = extractvalue [1 x i32] [[TMP5]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CLEANUP-CPS-NEXT: [[T110:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; CLEANUP-CPS-NEXT: [[T29:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T110]]) +; CLEANUP-CPS-NEXT: [[T38:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T29]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T38]]) +; CLEANUP-CPS-NEXT: [[DIS_DATA_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[SYS_DATA_I2:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I1_FCA_0_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I3:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I2]], 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA2_I4:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I3]], i64 -1, 5 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 +; CLEANUP-CPS-NEXT: store i32 [[TMP7]], ptr @PAYLOAD, align 4 +; CLEANUP-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 +; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 +; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @Miss.resume.1) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i32 [[TMP8]], i32 5), !continuation.returnedRegistercount !14, !continuation.registercount [[META14:![0-9]+]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define dso_local void @Miss.resume.1( +; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.attribute.size [[META16]] !lgc.rt.shaderstage [[META17]] !lgc.cps [[META18]] !continuation [[META19]] !continuation.stacksize [[META20]] { +; CLEANUP-CPS-NEXT: entryresume.1: +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 20) +; CLEANUP-CPS-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT13:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CLEANUP-CPS-NEXT: [[T17:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; CLEANUP-CPS-NEXT: [[T26:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T17]]) +; CLEANUP-CPS-NEXT: [[T35:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T26]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T35]]) +; CLEANUP-CPS-NEXT: [[DIS_DATA_I5_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT13]], 0 +; CLEANUP-CPS-NEXT: [[SYS_DATA_I6:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I5_FCA_0_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I7:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I6]], 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA2_I8:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I7]], i64 -1, 5 +; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 +; CLEANUP-CPS-NEXT: store i32 [[TMP9]], ptr @PAYLOAD, align 4 +; CLEANUP-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 +; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 +; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 +; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 4), align 4 +; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 5), align 4 +; CLEANUP-CPS-NEXT: [[TMP10:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @Miss.resume.2) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i32 [[TMP10]], i32 5), !continuation.returnedRegistercount !14, !continuation.registercount [[META14]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define dso_local void @Miss.resume.2( +; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.attribute.size [[META16]] !lgc.rt.shaderstage [[META17]] !lgc.cps [[META18]] !continuation [[META19]] !continuation.stacksize [[META20]] { +; CLEANUP-CPS-NEXT: entryresume.2: +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 20) +; CLEANUP-CPS-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 4), align 4 +; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 5), align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT15:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CLEANUP-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[TMP4]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR]], align 4 +; CLEANUP-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 undef, 0 +; CLEANUP-CPS-NEXT: store [1 x i32] [[DOTFCA_0_INSERT]], ptr @PAYLOAD, align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT11:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT15]], 0 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 20) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT11]]) +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; CLEANUP-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR1:[0-9]+]] { +; CLEANUP-CPS-NEXT: ret i32 5 +; +; +; DXILCONTPOSTPROCESS-LABEL: define void @Miss( +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META15:![0-9]+]] !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] !continuation.stacksize [[META19:![0-9]+]] !continuation.state [[META20:![0-9]+]] { +; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: +; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP5]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[TMP6]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP2]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP7]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP8]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_0_EXTRACT]], ptr addrspace(21) [[TMP9]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; DXILCONTPOSTPROCESS-NEXT: [[T1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; DXILCONTPOSTPROCESS-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T1]]) +; DXILCONTPOSTPROCESS-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) +; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 -1, 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT30:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT30_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT30]], 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_0_INSERT30_FCA_0_EXTRACT]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 28 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Miss.resume.0 to i64)) +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP13]], i64 [[TMP14]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount !17 +; DXILCONTPOSTPROCESS-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-LABEL: define dso_local void @Miss.resume.0( +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META15]] !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META17]] !continuation [[META18]] { +; DXILCONTPOSTPROCESS-NEXT: entryresume.0: +; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -28 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[TMP5]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT32:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT13:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; DXILCONTPOSTPROCESS-NEXT: [[T110:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; DXILCONTPOSTPROCESS-NEXT: [[T29:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T110]]) +; DXILCONTPOSTPROCESS-NEXT: [[T38:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T29]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T38]]) +; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT13]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[SYS_DATA_I2:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I1_FCA_0_INSERT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I3:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I2]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA2_I4:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I3]], i64 -1, 5 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 1) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], -8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP9]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP11]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP15]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP16]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], 28 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP18]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Miss.resume.1 to i64)) +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP19]], i64 [[TMP20]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I4]]), !continuation.registercount [[META13:![0-9]+]], !continuation.returnedRegistercount !13 +; DXILCONTPOSTPROCESS-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-LABEL: define dso_local void @Miss.resume.1( +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META15]] !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META13]] !continuation [[META18]] { +; DXILCONTPOSTPROCESS-NEXT: entryresume.1: +; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -28 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 1) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], -8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP8]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(21) [[TMP10]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP15]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(21) [[TMP16]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT15:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; DXILCONTPOSTPROCESS-NEXT: [[T17:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; DXILCONTPOSTPROCESS-NEXT: [[T26:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T17]]) +; DXILCONTPOSTPROCESS-NEXT: [[T35:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T26]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T35]]) +; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I5_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT15]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[SYS_DATA_I6:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I5_FCA_0_INSERT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I7:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I6]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA2_I8:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I7]], i64 -1, 5 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 1) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], -8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = add i32 [[TMP20]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP21]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP22]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP23]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], -8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = add i32 [[TMP25]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP26]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP27]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP28]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], -8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP31:%.*]] = add i32 [[TMP30]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[TMP32:%.*]] = inttoptr i32 [[TMP31]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP32]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP33]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP35:%.*]] = add i32 [[TMP34]], -8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP36]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP37]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP38]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP39:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP40:%.*]] = add i32 [[TMP39]], 28 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP40]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP41:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP42:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Miss.resume.2 to i64)) +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP41]], i64 [[TMP42]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I8]]), !continuation.registercount [[META13]], !continuation.returnedRegistercount !13 +; DXILCONTPOSTPROCESS-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-LABEL: define dso_local void @Miss.resume.2( +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META15]] !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META13]] !continuation [[META18]] { +; DXILCONTPOSTPROCESS-NEXT: entryresume.2: +; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -28 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 1) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], -8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP8]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(21) [[TMP10]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP15]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(21) [[TMP16]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], -8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP20]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP21]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(21) [[TMP22]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], -8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = add i32 [[TMP25]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP26]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP27]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(21) [[TMP28]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT17:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; DXILCONTPOSTPROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP4]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP31]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP32]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP33:%.*]] = add i32 [[TMP4]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[TMP34:%.*]] = inttoptr i32 [[TMP33]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP34]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP35]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[DOTFCA_0_EXTRACT_RELOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_0_INSERT_FCA_0_EXTRACT]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT12:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT17]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP36:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP36]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT12]]), !continuation.registercount [[META17]] +; DXILCONTPOSTPROCESS-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-LABEL: define i32 @_cont_GetLocalRootIndex( +; DXILCONTPOSTPROCESS-SAME: ptr [[DATA:%.*]]) #[[ATTR1:[0-9]+]] { +; DXILCONTPOSTPROCESS-NEXT: ret i32 5 +; diff --git a/llvmraytracing/test/dx/lower-rt-pipeline-simple-call-shader.ll b/llvmraytracing/test/dx/lower-rt-pipeline-simple-call-shader.ll new file mode 100644 index 0000000000..4e770e4ae2 --- /dev/null +++ b/llvmraytracing/test/dx/lower-rt-pipeline-simple-call-shader.ll @@ -0,0 +1,475 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 +; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s +; RUN: count 0 < %t0.stderr +; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,remove-types-metadata' \ +; RUN: -S 2> %t1.stderr | FileCheck -check-prefix=CLEANUP %s +; RUN: count 0 < %t1.stderr +; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,remove-types-metadata' \ +; RUN: -S 2> %t2.stderr | FileCheck -check-prefix=REGISTERBUFFER %s +; RUN: count 0 < %t2.stderr +; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ +; RUN: -S 2> %t3.stderr | FileCheck -check-prefix=POSTPROCESS %s +; RUN: count 0 < %t3.stderr +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t4.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-CPS %s +; RUN: count 0 < %t4.stderr +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,remove-types-metadata' \ +; RUN: -S %s 2> %t5.stderr | FileCheck -check-prefix=CLEANUP-CPS %s +; RUN: count 0 < %t5.stderr +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,register-buffer,lint,remove-types-metadata' \ +; RUN: -S %s 2> %t6.stderr | FileCheck -check-prefix=REGISTERBUFFER-CPS %s +; RUN: count 0 < %t6.stderr +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ +; RUN: -S %s 2> %t7.stderr | FileCheck -check-prefix=POSTPROCESS-CPS %s +; RUN: count 0 < %t7.stderr + +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" + +%dx.types.Handle = type { i8* } +%struct.DispatchSystemData = type { i32 } +%struct.TraversalData = type { %struct.SystemData } +%struct.SystemData = type { %struct.DispatchSystemData } +%struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } +%struct.MyParams = type { i32 } +%"class.RWTexture2D >" = type { <4 x float> } + +@"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 + +declare i32 @_cont_GetContinuationStackAddr() + +declare %struct.DispatchSystemData @_cont_SetupRayGen() + +declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) + +declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) + +declare !types !13 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) + +declare !types !15 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) + +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !types !17 { + ret i32 5 +} + +; Function Attrs: nounwind memory(none) +declare !types !22 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone %data) #1 + +; Function Attrs: nounwind memory(none) +declare !types !22 <3 x i32> @_cont_DispatchRaysDimensions3(%struct.DispatchSystemData* nocapture readnone %data) #1 + +define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #1 !types !18 { + %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 + %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) + store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 + call void @_AmdRestoreSystemData(%struct.DispatchSystemData* %data) + ret void +} + +define void @called(%struct.MyParams* %params) !types !19 { + call void @dx.op.callShader.struct.MyParams(i32 159, i32 2, %struct.MyParams* nonnull %params) + %a = call i32 @dx.op.dispatchRaysIndex.i32(i32 145, i8 0) + %b = call i32 @dx.op.dispatchRaysDimensions.i32(i32 146, i8 0) + ret void +} + +; Function Attrs: nounwind +declare !types !21 void @dx.op.callShader.struct.MyParams(i32, i32, %struct.MyParams*) #0 + +; Function Attrs: nounwind memory(none) +declare i32 @dx.op.dispatchRaysDimensions.i32(i32, i8) #1 + +; Function Attrs: nounwind memory(none) +declare i32 @dx.op.dispatchRaysIndex.i32(i32, i8) #1 +attributes #0 = { nounwind } +attributes #1 = { alwaysinline } + +!llvm.ident = !{!0} +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.entryPoints = !{!3, !6} +!lgc.cps.module = !{} + +!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"} +!1 = !{i32 1, i32 6} +!2 = !{!"lib", i32 6, i32 6} +!3 = !{null, !"", null, !4, !12} +!4 = !{!5, !9, null, null} +!5 = !{!6} +!6 = !{void (%struct.MyParams*)* @called, !"called", null, null, !7} +!7 = !{i32 8, i32 12, i32 6, i32 16, i32 7, i32 8, i32 5, !8} +!8 = !{i32 0} +!9 = !{!10} +!10 = !{i32 0, %"class.RWTexture2D >"* bitcast (%dx.types.Handle* @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" to %"class.RWTexture2D >"*), !"RenderTarget", i32 0, i32 0, i32 1, i32 2, i1 false, i1 false, i1 false, !11} +!11 = !{i32 0, i32 9} +!12 = !{i32 0, i64 65536} +!13 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !14} +!14 = !{i32 0, %struct.SystemData poison} +!15 = !{!"function", !"void", !16} +!16 = !{i32 0, %struct.DispatchSystemData poison} +!17 = !{!"function", i32 poison, !16} +!18 = !{!"function", !"void", !16, i32 poison} +!19 = !{!"function", !"void", !20} +!20 = !{i32 0, %struct.MyParams poison} +!21 = !{!"function", !"void", i32 poison, i32 poison, !20} +!22 = !{!"function", <3 x i32> poison, !16} + +; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( +; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 5 +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @called( +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META17:![0-9]+]] !continuation.registercount [[META15:![0-9]+]] !continuation [[META18:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_MYPARAMS:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = load i32, ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP6]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META15]], !continuation.returnedRegistercount !15 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP7]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_MYPARAMS]] poison, ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP8]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] +; LOWERRAYTRACINGPIPELINE: .split: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP11]], i8 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() +; LOWERRAYTRACINGPIPELINE-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP12]], i8 0 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP15]], !continuation.registercount [[META15]] +; +; +; CLEANUP-LABEL: define i32 @_cont_GetLocalRootIndex( +; CLEANUP-SAME: ptr [[DATA:%.*]]) { +; CLEANUP-NEXT: ret i32 5 +; +; +; CLEANUP-LABEL: define void @called( +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META17:![0-9]+]] !continuation.registercount [[META15:![0-9]+]] !continuation [[META18:![0-9]+]] !continuation.stacksize [[META19:![0-9]+]] !continuation.state [[META19]] { +; CLEANUP-NEXT: AllocaSpillBB: +; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 +; CLEANUP-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-NEXT: store i32 [[TMP2]], ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 2, i64 ptrtoint (ptr @called.resume.0 to i64), [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META15]], !continuation.returnedRegistercount !15 +; CLEANUP-NEXT: unreachable +; +; +; CLEANUP-LABEL: define dso_local void @called.resume.0( +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META17]] !continuation.registercount [[META15]] !continuation [[META18]] { +; CLEANUP-NEXT: entryresume.0: +; CLEANUP-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 +; CLEANUP-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; CLEANUP-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP3]], i8 0 +; CLEANUP-NEXT: [[TMP4:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() +; CLEANUP-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP4]], i8 0 +; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; CLEANUP-NEXT: store i32 [[TMP2]], ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META15]] +; CLEANUP-NEXT: unreachable +; +; +; REGISTERBUFFER-LABEL: define i32 @_cont_GetLocalRootIndex( +; REGISTERBUFFER-SAME: ptr [[DATA:%.*]]) { +; REGISTERBUFFER-NEXT: ret i32 5 +; +; +; REGISTERBUFFER-LABEL: define void @called( +; REGISTERBUFFER-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META14:![0-9]+]] !continuation [[META17:![0-9]+]] !continuation.stacksize [[META18:![0-9]+]] !continuation.state [[META18]] { +; REGISTERBUFFER-NEXT: AllocaSpillBB: +; REGISTERBUFFER-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; REGISTERBUFFER-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 +; REGISTERBUFFER-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; REGISTERBUFFER-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) +; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; REGISTERBUFFER-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; REGISTERBUFFER-NEXT: store i32 [[TMP2]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 2, i64 ptrtoint (ptr @called.resume.0 to i64), [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 +; REGISTERBUFFER-NEXT: unreachable +; +; +; REGISTERBUFFER-LABEL: define dso_local void @called.resume.0( +; REGISTERBUFFER-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META14]] !continuation [[META17]] { +; REGISTERBUFFER-NEXT: entryresume.0: +; REGISTERBUFFER-NEXT: call void @lgc.cps.free(i32 8) +; REGISTERBUFFER-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; REGISTERBUFFER-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; REGISTERBUFFER-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 +; REGISTERBUFFER-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 +; REGISTERBUFFER-NEXT: [[TMP3:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; REGISTERBUFFER-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP3]], i8 0 +; REGISTERBUFFER-NEXT: [[TMP4:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() +; REGISTERBUFFER-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP4]], i8 0 +; REGISTERBUFFER-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) +; REGISTERBUFFER-NEXT: store i32 [[TMP2]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] +; REGISTERBUFFER-NEXT: unreachable +; +; +; POSTPROCESS-LABEL: define i32 @_cont_GetLocalRootIndex( +; POSTPROCESS-SAME: ptr [[DATA:%.*]]) { +; POSTPROCESS-NEXT: ret i32 5 +; +; +; POSTPROCESS-LABEL: define void @called( +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META14:![0-9]+]] !continuation [[META17:![0-9]+]] !continuation.stacksize [[META18:![0-9]+]] !continuation.state [[META18]] { +; POSTPROCESS-NEXT: AllocaSpillBB: +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; POSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 +; POSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; POSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 8 +; POSTPROCESS-NEXT: store i32 [[TMP7]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP9:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @called.resume.0 to i64)) +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP8]], i64 [[TMP9]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 +; POSTPROCESS-NEXT: unreachable +; +; +; POSTPROCESS-LABEL: define dso_local void @called.resume.0( +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META14]] !continuation [[META17]] { +; POSTPROCESS-NEXT: entryresume.0: +; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -8 +; POSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 +; POSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POSTPROCESS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) +; POSTPROCESS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 +; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP7]], align 4 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 +; POSTPROCESS-NEXT: [[TMP9:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP9]], i8 0 +; POSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; POSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP10]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] +; POSTPROCESS-NEXT: unreachable +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr [[DATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret i32 5 +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @called( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META15:![0-9]+]] !continuation [[META18:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_MYPARAMS:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP4]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] (...) @lgc.cps.await__s_struct.DispatchSystemDatas(i32 2, i32 2, i32 5), !continuation.returnedRegistercount !15, !continuation.registercount [[META15]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_MYPARAMS]] poison, ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP5]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[DOTSPLIT:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: .split: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP9]], i8 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP11]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 2, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP12]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; CLEANUP-CPS-SAME: ptr [[DATA:%.*]]) { +; CLEANUP-CPS-NEXT: ret i32 5 +; +; +; CLEANUP-CPS-LABEL: define void @called( +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META15:![0-9]+]] !continuation [[META18:![0-9]+]] { +; CLEANUP-CPS-NEXT: AllocaSpillBB: +; CLEANUP-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CLEANUP-CPS-NEXT: [[RETURN_ADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(32) [[RETURN_ADDR_SPILL_ADDR]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; CLEANUP-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: store i32 undef, ptr @PAYLOAD, align 4 +; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @called.resume.0) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, {} poison, i32 [[TMP1]], i32 5), !continuation.returnedRegistercount !15, !continuation.registercount [[META15]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define dso_local void @called.resume.0( +; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META17]] !lgc.cps [[META15]] !continuation [[META18]] { +; CLEANUP-CPS-NEXT: entryresume.0: +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; CLEANUP-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], ptr [[TMP4]], align 4 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr @PAYLOAD, align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CLEANUP-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP5]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR]], align 4 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP4]]) +; CLEANUP-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP7]], i8 0 +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[TMP4]]) +; CLEANUP-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 +; CLEANUP-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; CLEANUP-CPS-NEXT: store i32 [[TMP6]], ptr @PAYLOAD, align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 2, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; CLEANUP-CPS-NEXT: unreachable +; +; +; REGISTERBUFFER-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; REGISTERBUFFER-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { +; REGISTERBUFFER-CPS-NEXT: ret i32 5 +; +; +; REGISTERBUFFER-CPS-LABEL: define void @called( +; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !continuation [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META14:![0-9]+]] { +; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: +; REGISTERBUFFER-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(32) [[RETURN_ADDR_SPILL_ADDR]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; REGISTERBUFFER-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: store i32 undef, ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @called.resume.0) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, {} poison, i32 [[TMP1]], i32 5), !continuation.returnedRegistercount !14, !continuation.registercount [[META14]] +; REGISTERBUFFER-CPS-NEXT: unreachable +; +; +; REGISTERBUFFER-CPS-LABEL: define dso_local void @called.resume.0( +; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !continuation [[META16]] !lgc.rt.shaderstage [[META17]] !lgc.cps [[META14]] { +; REGISTERBUFFER-CPS-NEXT: entryresume.0: +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; REGISTERBUFFER-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], ptr [[TMP4]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP5]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP4]]) +; REGISTERBUFFER-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP7]], i8 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[TMP4]]) +; REGISTERBUFFER-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 +; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP6]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 2, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: unreachable +; +; +; POSTPROCESS-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; POSTPROCESS-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { +; POSTPROCESS-CPS-NEXT: ret i32 5 +; +; +; POSTPROCESS-CPS-LABEL: define void @called( +; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !continuation [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META14:![0-9]+]] { +; POSTPROCESS-CPS-NEXT: AllocaSpillBB: +; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 +; POSTPROCESS-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP4]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; POSTPROCESS-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: store i32 undef, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @called.resume.0) +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, {} poison, i32 [[TMP5]], i32 5), !continuation.returnedRegistercount !14, !continuation.registercount [[META14]] +; POSTPROCESS-CPS-NEXT: unreachable +; +; +; POSTPROCESS-CPS-LABEL: define dso_local void @called.resume.0( +; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !continuation [[META16]] !lgc.rt.shaderstage [[META17]] !lgc.cps [[META14]] { +; POSTPROCESS-CPS-NEXT: entryresume.0: +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], ptr [[TMP4]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], -8 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP6]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP8]], i32 0 +; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP9]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP4]]) +; POSTPROCESS-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP10]], i8 0 +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[TMP4]]) +; POSTPROCESS-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP11]], i8 0 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP7]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 2, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; POSTPROCESS-CPS-NEXT: unreachable +; diff --git a/shared/continuations/test/dx/lower-rt-pipeline-small-payload-field.ll b/llvmraytracing/test/dx/lower-rt-pipeline-small-payload-field.ll similarity index 97% rename from shared/continuations/test/dx/lower-rt-pipeline-small-payload-field.ll rename to llvmraytracing/test/dx/lower-rt-pipeline-small-payload-field.ll index 2f0edc058a..c03281c76b 100644 --- a/shared/continuations/test/dx/lower-rt-pipeline-small-payload-field.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-small-payload-field.ll @@ -5,7 +5,7 @@ ; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s ; RUN: count 0 < %t.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" ; The last two fields are relevant. The i16 needs special treatment, ; as well as the last two bytes of the <3 x i16>. diff --git a/shared/continuations/test/dx/lower-rt-pipeline-small-payload-field.ll.hlsl b/llvmraytracing/test/dx/lower-rt-pipeline-small-payload-field.ll.hlsl similarity index 100% rename from shared/continuations/test/dx/lower-rt-pipeline-small-payload-field.ll.hlsl rename to llvmraytracing/test/dx/lower-rt-pipeline-small-payload-field.ll.hlsl diff --git a/llvmraytracing/test/dx/lower-rt-pipeline.ll b/llvmraytracing/test/dx/lower-rt-pipeline.ll new file mode 100644 index 0000000000..cd4f278f45 --- /dev/null +++ b/llvmraytracing/test/dx/lower-rt-pipeline.ll @@ -0,0 +1,4793 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 +; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s +; RUN: count 0 < %t0.stderr +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-CPS %s +; RUN: count 0 < %t1.stderr +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,register-buffer,lint,remove-types-metadata' -S %s 2> %t2.stderr | FileCheck -check-prefix=REGISTERBUFFER-CPS %s +; RUN: count 0 < %t2.stderr +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t3.stderr | FileCheck -check-prefix=POSTPROCESS-CPS %s +; RUN: count 0 < %t3.stderr +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata,sroa' -S %s 2> %t4.stderr | FileCheck -check-prefix=SROA-CPS %s +; RUN: count 0 < %t4.stderr + +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" + +%dx.types.Handle = type { i8* } +%struct.DispatchSystemData = type { <3 x i32> } +%struct.TraversalData = type { %struct.SystemData, %struct.HitData, <3 x float>, <3 x float>, float, i64 } +%struct.SystemData = type { %struct.DispatchSystemData } +%struct.HitData = type { <3 x float>, <3 x float>, float, i32 } +%struct.AnyHitTraversalData = type { %struct.TraversalData, %struct.HitData } +%struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } +%struct.RayPayload = type { <4 x float> } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.BuiltInTriangleIntersectionAttributes2 = type { <2 x float> } +%struct.RaytracingAccelerationStructure = type { i32 } +%"class.RWTexture2D >" = type { <4 x float> } + +@"\01?Scene@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 +@"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 + +define i32 @_cont_GetContinuationStackAddr() #0 { + ret i32 0 +} + +declare %struct.DispatchSystemData @_cont_SetupRayGen() #0 + +declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) #0 + +declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) #0 + +declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #0 + +define %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData* %data) #0 !types !32 { + %resPtr = getelementptr %struct.AnyHitTraversalData, %struct.AnyHitTraversalData* %data, i32 0, i32 0 + %res = load %struct.HitData, %struct.HitData* %resPtr, align 4 + ret %struct.HitData %res +} + +declare !types !34 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 + +declare !types !36 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 + +define void @_cont_SetTriangleHitAttributes(%struct.SystemData* %data, %struct.BuiltInTriangleIntersectionAttributes %val) !types !37 { + %addr = getelementptr %struct.SystemData, %struct.SystemData* %data, i32 0, i32 0 + store %struct.BuiltInTriangleIntersectionAttributes %val, %struct.BuiltInTriangleIntersectionAttributes* %addr, align 4 + ret void +} + +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !types !38 { + ret i32 5 +} + +declare i1 @opaqueIsEnd() + +define i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 !types !40 { + %isEnd = call i1 @opaqueIsEnd() + ret i1 %isEnd +} + +declare !types !42 i32 @_cont_HitKind(%struct.SystemData*) #0 + +; Function Attrs: nounwind +declare i64 @_AmdGetResumePointAddr() #1 + +; Function Attrs: nounwind +declare !types !43 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 + +; Function Attrs: nounwind +declare !types !44 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #1 + +; Function Attrs: nounwind +declare !types !43 void @_cont_AcceptHitAndEndSearch(%struct.DispatchSystemData* nocapture readnone) #1 + +; Function Attrs: nounwind +declare !types !44 void @_cont_AcceptHit(%struct.AnyHitTraversalData* nocapture readnone) #1 + +; Function Attrs: nounwind +declare !types !43 void @_cont_IgnoreHit(%struct.DispatchSystemData* nocapture readnone) #1 + +; Function Attrs: nounwind +declare !types !44 void @_AmdAcceptHitAttributes(%struct.AnyHitTraversalData* nocapture readnone) #1 + +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !45 { + %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 + %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 + %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 + %addr = call i64 @_AmdGetResumePointAddr() #3 + %trav_data2 = insertvalue %struct.TraversalData %trav_data, i64 %addr, 5 + %newdata = call %struct.DispatchSystemData @_AmdAwaitTraversal(i64 4, %struct.TraversalData %trav_data2) + store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 + call void @_AmdRestoreSystemData(%struct.DispatchSystemData* %data) + ret void +} + +define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !types !46 { + %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 + %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) + store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 + call void @_AmdRestoreSystemData(%struct.DispatchSystemData* %data) + ret void +} + +define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !types !47 { + %origTPtr = getelementptr inbounds %struct.AnyHitTraversalData, %struct.AnyHitTraversalData* %data, i32 0, i32 0, i32 4 + %origT = load float, float* %origTPtr, align 4 + %isNoHit = fcmp fast uge float %t, %origT + br i1 %isNoHit, label %isEnd, label %callAHit + +callAHit: ; preds = %0 + %trav_data = load %struct.AnyHitTraversalData, %struct.AnyHitTraversalData* %data, align 4 + %newdata = call %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64 3, %struct.AnyHitTraversalData %trav_data, float %t, i32 %hitKind) + store %struct.AnyHitTraversalData %newdata, %struct.AnyHitTraversalData* %data, align 4 + call void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData* %data) + ret i1 true + +isEnd: ; preds = %0 + ; Call AcceptHitAttributes, just to simulate it + call void @_AmdAcceptHitAttributes(%struct.AnyHitTraversalData* %data) + ret i1 false +} + +define <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* %data) !types !48 { + %resPtr.1 = getelementptr %struct.DispatchSystemData, %struct.DispatchSystemData* %data, i32 0, i32 0, i32 0 + %res.1 = load i32, i32* %resPtr.1, align 4 + %resPtr.2 = getelementptr %struct.DispatchSystemData, %struct.DispatchSystemData* %data, i32 0, i32 0, i32 1 + %res.2 = load i32, i32* %resPtr.2, align 4 + %resPtr.3 = getelementptr %struct.DispatchSystemData, %struct.DispatchSystemData* %data, i32 0, i32 0, i32 2 + %res.3 = load i32, i32* %resPtr.3, align 4 + %val.0 = insertelement <3 x i32> undef, i32 %res.1, i32 0 + %val.1 = insertelement <3 x i32> %val.0, i32 %res.2, i32 1 + %val.2 = insertelement <3 x i32> %val.1, i32 %res.3, i32 2 + ret <3 x i32> %val.2 +} + +define <3 x float> @_cont_ObjectRayOrigin3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !types !49 { + %resPtr.1 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 0, i32 0 + %res.1 = load float, float* %resPtr.1, align 4 + %resPtr.2 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 0, i32 1 + %res.2 = load float, float* %resPtr.2, align 4 + %resPtr.3 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 0, i32 2 + %res.3 = load float, float* %resPtr.3, align 4 + %val.0 = insertelement <3 x float> undef, float %res.1, i32 0 + %val.1 = insertelement <3 x float> %val.0, float %res.2, i32 1 + %val.2 = insertelement <3 x float> %val.1, float %res.3, i32 2 + ret <3 x float> %val.2 +} + +define <3 x float> @_cont_ObjectRayDirection3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !types !49 { + %resPtr.1 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 1, i32 0 + %res.1 = load float, float* %resPtr.1, align 4 + %resPtr.2 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 1, i32 1 + %res.2 = load float, float* %resPtr.2, align 4 + %resPtr.3 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 1, i32 2 + %res.3 = load float, float* %resPtr.3, align 4 + %val.0 = insertelement <3 x float> undef, float %res.1, i32 0 + %val.1 = insertelement <3 x float> %val.0, float %res.2, i32 1 + %val.2 = insertelement <3 x float> %val.1, float %res.3, i32 2 + ret <3 x float> %val.2 +} + +define float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !types !51 { + %resPtr = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 2 + %res = load float, float* %resPtr, align 4 + ret float %res +} + +; Function Attrs: nounwind +define void @MyRayGen() #2 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 + %2 = load %dx.types.Handle, %dx.types.Handle* @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 + %3 = alloca %struct.RayPayload, align 4 + %4 = bitcast %struct.RayPayload* %3 to i8* + call void @llvm.lifetime.start.p0i8(i64 16, i8* %4) #1 + %5 = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %3, i32 0, i32 0 + store <4 x float> zeroinitializer, <4 x float>* %5, align 4, !tbaa !52 + %6 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) + %7 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %6, %dx.types.ResourceProperties { i32 16, i32 0 }) + call void @dx.op.traceRay.struct.RayPayload(i32 157, %dx.types.Handle %7, i32 16, i32 -1, i32 0, i32 1, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0x3F50624DE0000000, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+04, %struct.RayPayload* nonnull %3) + %8 = load <4 x float>, <4 x float>* %5, align 4, !tbaa !52 + %9 = call i32 @dx.op.dispatchRaysIndex.i32(i32 145, i8 0) + %10 = call i32 @dx.op.dispatchRaysIndex.i32(i32 145, i8 1) + %11 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %2) + %12 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %11, %dx.types.ResourceProperties { i32 4098, i32 1033 }) + %13 = extractelement <4 x float> %8, i64 0 + %14 = extractelement <4 x float> %8, i64 1 + %15 = extractelement <4 x float> %8, i64 2 + %16 = extractelement <4 x float> %8, i64 3 + call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle %12, i32 %9, i32 %10, i32 undef, float %13, float %14, float %15, float %16, i8 15) + call void @llvm.lifetime.end.p0i8(i64 16, i8* %4) #1 + ret void +} + +; Function Attrs: nounwind +define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #2 !types !55 { + %1 = getelementptr inbounds %struct.BuiltInTriangleIntersectionAttributes, %struct.BuiltInTriangleIntersectionAttributes* %attr, i32 0, i32 0 + %2 = load <2 x float>, <2 x float>* %1, align 4 + %3 = extractelement <2 x float> %2, i32 0 + %4 = fsub fast float 1.000000e+00, %3 + %5 = extractelement <2 x float> %2, i32 1 + %6 = fsub fast float %4, %5 + %7 = insertelement <4 x float> undef, float %6, i64 0 + %8 = insertelement <4 x float> %7, float %3, i64 1 + %9 = insertelement <4 x float> %8, float %5, i64 2 + %10 = insertelement <4 x float> %9, float 1.000000e+00, i64 3 + %11 = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %payload, i32 0, i32 0 + store <4 x float> %10, <4 x float>* %11, align 4 + ret void +} + +; Function Attrs: nounwind +define void @MyAnyHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readnone %attr) #2 !types !55 { + %1 = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %payload, i32 0, i32 0 + %2 = load <4 x float>, <4 x float>* %1, align 4 + %3 = call float @dx.op.objectRayOrigin.f32(i32 149, i8 0) + %4 = call float @dx.op.objectRayDirection.f32(i32 150, i8 0) + %5 = call float @dx.op.rayTCurrent.f32(i32 154) + %6 = fmul fast float %5, %4 + %7 = fadd fast float %6, %3 + %8 = fcmp fast ogt float %7, 0.000000e+00 + %9 = fcmp fast ogt float %7, 1.000000e+00 + %10 = fcmp fast ogt float %7, -1.000000e+00 + br i1 %8, label %11, label %14 + +11: ; preds = %0 +; acceptHitAndEndSearch + store <4 x float> %2, <4 x float>* %1, align 4 + br i1 %9, label %12, label %13 + +12: ; preds = %11 +; acceptHitAndEndSearch with unreachable + call void @dx.op.acceptHitAndEndSearch(i32 156) + unreachable + +13: ; preds = %11 +; acceptHitAndEndSearch with ret void + call void @dx.op.acceptHitAndEndSearch(i32 156) + ret void + +14: ; preds = %0 +; IgnoreHit or AcceptHit + br i1 %10, label %15, label %18 + +15: ; preds = %14 +; IgnoreHit + br i1 %9, label %16, label %17 + +16: ; preds = %15 +; IgnoreHit with unreachable + call void @dx.op.ignoreHit(i32 155) + unreachable + +17: ; preds = %15 +; IgnoreHit with ret void (as emitted by debug mode dxc) + call void @dx.op.ignoreHit(i32 155) + ret void + +18: ; preds = %14 +; AcceptHit + store <4 x float> %2, <4 x float>* %1, align 4 + ret void +} + +; Function Attrs: nounwind +define void @MyIntersectionShader() #2 { + %1 = alloca %struct.BuiltInTriangleIntersectionAttributes, align 4 + %2 = call float @dx.op.rayTCurrent.f32(i32 154) + %3 = bitcast %struct.BuiltInTriangleIntersectionAttributes* %1 to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %3) #1 + %4 = call i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32 158, float %2, i32 0, %struct.BuiltInTriangleIntersectionAttributes* nonnull %1) + call void @llvm.lifetime.end.p0i8(i64 8, i8* %3) #1 + ret void +} + +; Function Attrs: nounwind +define void @MyIntersectionShader2() #2 { + %1 = alloca %struct.BuiltInTriangleIntersectionAttributes2, align 4 + %2 = call float @dx.op.rayTCurrent.f32(i32 154) + %3 = bitcast %struct.BuiltInTriangleIntersectionAttributes2* %1 to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %3) #1 + %4 = call i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes2(i32 158, float %2, i32 0, %struct.BuiltInTriangleIntersectionAttributes2* nonnull %1) + call void @llvm.lifetime.end.p0i8(i64 8, i8* %3) #1 + ret void +} + +; Function Attrs: nounwind +define void @MyMissShader(%struct.RayPayload* noalias nocapture %payload) #2 !types !58 { + %1 = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %payload, i32 0, i32 0 + store <4 x float> , <4 x float>* %1, align 4 + ret void +} + +; Function Attrs: nounwind +declare !types !59 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #1 + +; Function Attrs: nounwind +declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #1 + +; Function Attrs: nounwind memory(none) +declare i32 @dx.op.dispatchRaysIndex.i32(i32, i8) #3 + +; Function Attrs: nounwind memory(none) +declare float @dx.op.objectRayDirection.f32(i32, i8) #3 + +; Function Attrs: nounwind memory(none) +declare float @dx.op.objectRayOrigin.f32(i32, i8) #3 + +; Function Attrs: nounwind memory(read) +declare float @dx.op.rayTCurrent.f32(i32) #4 + +declare void @dx.op.acceptHitAndEndSearch(i32) #0 + +declare void @dx.op.ignoreHit(i32) #0 + +; Function Attrs: nounwind +declare !types !60 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #1 + +; Function Attrs: nounwind +declare !types !61 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes2(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes2*) #1 + +; Function Attrs: nounwind memory(none) +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #3 + +; Function Attrs: nounwind memory(read) +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #4 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare !types !63 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare !types !63 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 + +attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } +attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind memory(none) } +attributes #4 = { nounwind memory(read) } +attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } + +!llvm.ident = !{!0} +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.resources = !{!3} +!dx.typeAnnotations = !{!10} +!dx.entryPoints = !{!18, !20, !23, !25, !27, !29, !31} +!lgc.cps.module = !{} + +!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"} +!1 = !{i32 1, i32 6} +!2 = !{!"lib", i32 6, i32 6} +!3 = !{!4, !7, null, null} +!4 = !{!5} +!5 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?Scene@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"Scene", i32 0, i32 0, i32 1, i32 16, i32 0, !6} +!6 = !{i32 0, i32 4} +!7 = !{!8} +!8 = !{i32 0, %"class.RWTexture2D >"* bitcast (%dx.types.Handle* @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" to %"class.RWTexture2D >"*), !"RenderTarget", i32 0, i32 0, i32 1, i32 2, i1 false, i1 false, i1 false, !9} +!9 = !{i32 0, i32 9} +!10 = !{i32 1, void ()* @MyRayGen, !11, void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @MyClosestHitShader, !14, void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @MyAnyHitShader, !14, void ()* @MyIntersectionShader, !11, void ()* @MyIntersectionShader2, !11, void (%struct.RayPayload*)* @MyMissShader, !17} +!11 = !{!12} +!12 = !{i32 1, !13, !13} +!13 = !{} +!14 = !{!12, !15, !16} +!15 = !{i32 2, !13, !13} +!16 = !{i32 0, !13, !13} +!17 = !{!12, !15} +!18 = !{null, !"", null, !3, !19} +!19 = !{i32 0, i64 65536} +!20 = !{void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @MyAnyHitShader, !"MyAnyHitShader", null, null, !21} +!21 = !{i32 8, i32 9, i32 6, i32 16, i32 7, i32 8, i32 5, !22} +!22 = !{i32 0} +!23 = !{void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @MyClosestHitShader, !"MyClosestHitShader", null, null, !24} +!24 = !{i32 8, i32 10, i32 6, i32 16, i32 7, i32 8, i32 5, !22} +!25 = !{void ()* @MyIntersectionShader, !"MyIntersectionShader", null, null, !26} +!26 = !{i32 8, i32 8, i32 5, !22} +!27 = !{void (%struct.RayPayload*)* @MyMissShader, !"MyMissShader", null, null, !28} +!28 = !{i32 8, i32 11, i32 6, i32 16, i32 5, !22} +!29 = !{void ()* @MyRayGen, !"MyRayGen", null, null, !30} +!30 = !{i32 8, i32 7, i32 5, !22} +!31 = !{void ()* @MyIntersectionShader2, !"MyIntersectionShader2", null, null, !26} +!32 = !{!"function", %struct.HitData poison, !33} +!33 = !{i32 0, %struct.AnyHitTraversalData poison} +!34 = !{!"function", %struct.HitData poison, !35} +!35 = !{i32 0, %struct.SystemData poison} +!36 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !35} +!37 = !{!"function", !"void", !35, %struct.BuiltInTriangleIntersectionAttributes poison} +!38 = !{!"function", i32 poison, !39} +!39 = !{i32 0, %struct.DispatchSystemData poison} +!40 = !{!"function", i1 poison, !41} +!41 = !{i32 0, %struct.TraversalData poison} +!42 = !{!"function", i32 poison, !35} +!43 = !{!"function", !"void", !39} +!44 = !{!"function", !"void", !33} +!45 = !{!"function", !"void", !39, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} +!46 = !{!"function", !"void", !39, i32 poison} +!47 = !{!"function", i1 poison, !33, float poison, i32 poison} +!48 = !{!"function", <3 x i32> poison, !39} +!49 = !{!"function", <3 x float> poison, !39, !50} +!50 = !{i32 0, %struct.HitData poison} +!51 = !{!"function", float poison, !39, !50} +!52 = !{!53, !53, i64 0} +!53 = !{!"omnipotent char", !54, i64 0} +!54 = !{!"Simple C/C++ TBAA"} +!55 = !{!"function", !"void", !56, !57} +!56 = !{i32 0, %struct.RayPayload poison} +!57 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} +!58 = !{!"function", !"void", !56} +!59 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !56} +!60 = !{!"function", i1 poison, i32 poison, float poison, i32 poison, !57} +!61 = !{!"function", i1 poison, i32 poison, float poison, i32 poison, !62} +!62 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes2 poison} +!63 = !{!"function", !"void", i64 poison, !64} +!64 = !{i32 0, i8 poison} +; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetContinuationStackAddr( +; LOWERRAYTRACINGPIPELINE-SAME: ) #[[ATTR0:[0-9]+]] { +; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 0 +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.HitData @_cont_GetCandidateState( +; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_ANYHITTRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES:%.*]] = load [[STRUCT_HITDATA:%.*]], ptr [[RESPTR]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_HITDATA]] [[RES]] +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define void @_cont_SetTriangleHitAttributes( +; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[VAL:%.*]]) { +; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]], ptr [[ADDR]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret void +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( +; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 5 +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define i1 @_cont_IsEndSearch( +; LOWERRAYTRACINGPIPELINE-SAME: ptr [[TMP0:%.*]]) #[[ATTR0]] { +; LOWERRAYTRACINGPIPELINE-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() +; LOWERRAYTRACINGPIPELINE-NEXT: ret i1 [[ISEND]] +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define <3 x i32> @_cont_DispatchRaysIndex3( +; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_1:%.*]] = load i32, ptr [[RESPTR_1]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_2:%.*]] = load i32, ptr [[RESPTR_2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_3:%.*]] = load i32, ptr [[RESPTR_3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_0:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_1:%.*]] = insertelement <3 x i32> [[VAL_0]], i32 [[RES_2]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_2:%.*]] = insertelement <3 x i32> [[VAL_1]], i32 [[RES_3]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: ret <3 x i32> [[VAL_2]] +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define <3 x float> @_cont_ObjectRayOrigin3( +; LOWERRAYTRACINGPIPELINE-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: ret <3 x float> [[VAL_2]] +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define <3 x float> @_cont_ObjectRayDirection3( +; LOWERRAYTRACINGPIPELINE-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 1, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: ret <3 x float> [[VAL_2]] +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define float @_cont_RayTCurrent( +; LOWERRAYTRACINGPIPELINE-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES:%.*]] = load float, ptr [[RESPTR]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret float [[RES]] +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define void @MyRayGen( +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.entry [[META14:![0-9]+]] !continuation.registercount [[META23]] !continuation [[META36:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP4]] to ptr +; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP5]]) #[[ATTR1:[0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA37:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR3:[0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP10]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP14]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP14]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META34:![0-9]+]], !continuation.returnedRegistercount !34 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP21]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP23]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP23]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP27]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP27]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[TMP27]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP22]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] +; LOWERRAYTRACINGPIPELINE: .split: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA37]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP35]], i8 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP36]], i8 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP37]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = extractelement <4 x float> [[TMP34]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = extractelement <4 x float> [[TMP34]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = extractelement <4 x float> [[TMP34]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = extractelement <4 x float> [[TMP34]], i64 3 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP38]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP39]], float [[TMP40]], float [[TMP41]], float [[TMP42]], i8 15) +; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP5]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META33:![0-9]+]] +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @MyClosestHitShader( +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META41:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP5]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP16]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[HITATTRS]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load <2 x float>, ptr [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = extractelement <2 x float> [[TMP24]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = fsub fast float 1.000000e+00, [[TMP25]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[TMP24]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = fsub fast float [[TMP26]], [[TMP27]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = insertelement <4 x float> undef, float [[TMP28]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP25]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = insertelement <4 x float> [[TMP30]], float [[TMP27]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = insertelement <4 x float> [[TMP31]], float 1.000000e+00, i64 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP32]], ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[TMP34]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[TMP35]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP37]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr [[TMP34]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[TMP38]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP40]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = getelementptr i32, ptr [[TMP38]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[TMP38]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP44]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP45]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP46]], !continuation.registercount [[META34]] +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @MyAnyHitShader( +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META42:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META43:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_HITDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP14]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP18]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP18]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP18]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[TMP25]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP26]], ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP32]], ptr [[TMP30]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load <4 x float>, ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I1:%.*]] = load [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[RES_I1]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_1_I2:%.*]] = load float, ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_2_I3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP4]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_2_I4:%.*]] = load float, ptr [[RESPTR_2_I3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_3_I5:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP4]], i32 0, i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_3_I6:%.*]] = load float, ptr [[RESPTR_3_I5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_0_I7:%.*]] = insertelement <3 x float> undef, float [[RES_1_I2]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_1_I8:%.*]] = insertelement <3 x float> [[VAL_0_I7]], float [[RES_2_I4]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_2_I9:%.*]] = insertelement <3 x float> [[VAL_1_I8]], float [[RES_3_I6]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[VAL_2_I9]], i8 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I:%.*]] = load [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_1_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_1_I:%.*]] = load float, ptr [[RESPTR_1_I]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_2_I:%.*]] = load float, ptr [[RESPTR_2_I]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_3_I:%.*]] = load float, ptr [[RESPTR_3_I]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x float> undef, float [[RES_1_I]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x float> [[VAL_0_I]], float [[RES_2_I]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x float> [[VAL_1_I]], float [[RES_3_I]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[VAL_2_I]], i8 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I10:%.*]] = load [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[RES_I10]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I11:%.*]] = load float, ptr [[RESPTR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = fmul fast float [[RES_I11]], [[EXTRACT]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = fadd fast float [[TMP38]], [[EXTRACT1]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = fcmp fast ogt float [[TMP39]], 0.000000e+00 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], 1.000000e+00 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = fcmp fast ogt float [[TMP39]], -1.000000e+00 +; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP40]], label [[TMP43:%.*]], label [[TMP88:%.*]] +; LOWERRAYTRACINGPIPELINE: 43: +; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP34]], ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP41]], label [[TMP44:%.*]], label [[TMP66:%.*]] +; LOWERRAYTRACINGPIPELINE: 44: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP45]]) +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[TMP46]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[TMP47]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr [[TMP46]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = getelementptr i32, ptr [[TMP50]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP52]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = getelementptr i32, ptr [[TMP50]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP54]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = getelementptr i32, ptr [[TMP50]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP56]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = load i32, ptr [[TMP57]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP59]], ptr [[TMP58]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = load i32, ptr [[TMP60]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP62]], ptr [[TMP61]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP63]], ptr [[TMP64]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP65]], !continuation.registercount [[META34]] +; LOWERRAYTRACINGPIPELINE: 66: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP67]]) +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP68]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP69]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP70]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP71]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP68]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = getelementptr i32, ptr [[TMP72]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP74]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = getelementptr i32, ptr [[TMP72]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP75]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP76]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = getelementptr i32, ptr [[TMP72]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP77]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP78]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP80:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP79]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP81]], ptr [[TMP80]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP83:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP82]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP84]], ptr [[TMP83]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP85:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP86:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP85]], ptr [[TMP86]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP87:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP87]], !continuation.registercount [[META34]] +; LOWERRAYTRACINGPIPELINE: 88: +; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP42]], label [[TMP89:%.*]], label [[TMP134:%.*]] +; LOWERRAYTRACINGPIPELINE: 89: +; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP41]], label [[TMP90:%.*]], label [[TMP112:%.*]] +; LOWERRAYTRACINGPIPELINE: 90: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_IgnoreHit(ptr [[TMP91]]) +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP93:%.*]] = getelementptr i32, ptr [[TMP92]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP94:%.*]] = getelementptr i32, ptr [[TMP93]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP95:%.*]] = load i32, ptr [[TMP94]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP95]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP96:%.*]] = getelementptr i32, ptr [[TMP92]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP97:%.*]] = getelementptr i32, ptr [[TMP96]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP98:%.*]] = load i32, ptr [[TMP97]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP98]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP99:%.*]] = getelementptr i32, ptr [[TMP96]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP100:%.*]] = load i32, ptr [[TMP99]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP100]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP101:%.*]] = getelementptr i32, ptr [[TMP96]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP102:%.*]] = load i32, ptr [[TMP101]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP102]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP103:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP104:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP105:%.*]] = load i32, ptr [[TMP103]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP105]], ptr [[TMP104]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP106:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP107:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP108:%.*]] = load i32, ptr [[TMP106]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP108]], ptr [[TMP107]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP109:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP109]], ptr [[TMP110]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP111:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP111]], !continuation.registercount [[META34]] +; LOWERRAYTRACINGPIPELINE: 112: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_IgnoreHit(ptr [[TMP113]]) +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP114:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP115:%.*]] = getelementptr i32, ptr [[TMP114]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP116:%.*]] = getelementptr i32, ptr [[TMP115]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP117:%.*]] = load i32, ptr [[TMP116]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP117]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP118:%.*]] = getelementptr i32, ptr [[TMP114]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP119:%.*]] = getelementptr i32, ptr [[TMP118]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP120:%.*]] = load i32, ptr [[TMP119]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP120]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP121:%.*]] = getelementptr i32, ptr [[TMP118]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP122:%.*]] = load i32, ptr [[TMP121]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP122]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP123:%.*]] = getelementptr i32, ptr [[TMP118]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP124:%.*]] = load i32, ptr [[TMP123]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP124]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP125:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP126:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP127:%.*]] = load i32, ptr [[TMP125]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP127]], ptr [[TMP126]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP128:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP129:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP130:%.*]] = load i32, ptr [[TMP128]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP130]], ptr [[TMP129]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP131:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP131]], ptr [[TMP132]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP133:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP133]], !continuation.registercount [[META34]] +; LOWERRAYTRACINGPIPELINE: 134: +; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP34]], ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP136:%.*]] = getelementptr i32, ptr [[TMP135]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP137:%.*]] = getelementptr i32, ptr [[TMP136]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP138:%.*]] = load i32, ptr [[TMP137]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP138]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP139:%.*]] = getelementptr i32, ptr [[TMP135]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP140:%.*]] = getelementptr i32, ptr [[TMP139]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP141:%.*]] = load i32, ptr [[TMP140]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP141]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP142:%.*]] = getelementptr i32, ptr [[TMP139]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP143:%.*]] = load i32, ptr [[TMP142]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP143]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP144:%.*]] = getelementptr i32, ptr [[TMP139]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP145:%.*]] = load i32, ptr [[TMP144]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP145]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP146:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP147:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP148:%.*]] = load i32, ptr [[TMP146]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP148]], ptr [[TMP147]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP149:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP150:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP151:%.*]] = load i32, ptr [[TMP149]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP151]], ptr [[TMP150]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP152:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP152]], ptr [[TMP153]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP154:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP154]], !continuation.registercount [[META34]] +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @MyIntersectionShader( +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META44:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META45:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I:%.*]] = load [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I2:%.*]] = load float, ptr [[RESPTR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = bitcast ptr [[TMP4]] to ptr +; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[ORIGTPTR_I:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[ORIGT_I:%.*]] = load float, ptr [[ORIGTPTR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I2]], [[ORIGT_I]] +; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] +; LOWERRAYTRACINGPIPELINE: callAHit.i: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP8]]), !continuation.registercount [[META33]], !continuation.returnedRegistercount !33 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] @await.struct.AnyHitTraversalData(ptr [[TMP9]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP10]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] +; LOWERRAYTRACINGPIPELINE: isEnd.i: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP18]], ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT]] +; LOWERRAYTRACINGPIPELINE: _cont_ReportHit.exit: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISEND_I1]], label [[TMP21:%.*]], label [[TMP23:%.*]] +; LOWERRAYTRACINGPIPELINE: 21: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP22]], !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE: 23: +; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP24]], !continuation.registercount [[META33]] +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @MyIntersectionShader2( +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META44]] !continuation.registercount [[META33]] !continuation [[META46:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I:%.*]] = load [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I2:%.*]] = load float, ptr [[RESPTR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = bitcast ptr [[TMP4]] to ptr +; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[ORIGTPTR_I:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[ORIGT_I:%.*]] = load float, ptr [[ORIGTPTR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I2]], [[ORIGT_I]] +; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] +; LOWERRAYTRACINGPIPELINE: callAHit.i: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[TMP8]]), !continuation.registercount [[META33]], !continuation.returnedRegistercount !33 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] @await.struct.AnyHitTraversalData(ptr [[TMP9]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP10]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] +; LOWERRAYTRACINGPIPELINE: isEnd.i: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP18]], ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT]] +; LOWERRAYTRACINGPIPELINE: _cont_ReportHit.exit: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISEND_I1]], label [[TMP21:%.*]], label [[TMP23:%.*]] +; LOWERRAYTRACINGPIPELINE: 21: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP22]], !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE: 23: +; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP24]], !continuation.registercount [[META33]] +; +; +; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @MyMissShader( +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META47:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META48:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> , ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP16]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP20]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP20]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP28]], !continuation.registercount [[META34]] +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define i32 @_cont_GetContinuationStackAddr( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ) #[[ATTR0:[0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret i32 0 +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define %struct.HitData @_cont_GetCandidateState( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_ANYHITTRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES:%.*]] = load [[STRUCT_HITDATA:%.*]], ptr [[RESPTR]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret [[STRUCT_HITDATA]] [[RES]] +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @_cont_SetTriangleHitAttributes( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr [[DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[VAL:%.*]]) { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]], ptr [[ADDR]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret void +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr [[DATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret i32 5 +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define i1 @_cont_IsEndSearch( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr [[TMP0:%.*]]) #[[ATTR0]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret i1 [[ISEND]] +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define <3 x i32> @_cont_DispatchRaysIndex3( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr [[DATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_1:%.*]] = load i32, ptr [[RESPTR_1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_2:%.*]] = load i32, ptr [[RESPTR_2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_3:%.*]] = load i32, ptr [[RESPTR_3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x i32> [[VAL_0]], i32 [[RES_2]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x i32> [[VAL_1]], i32 [[RES_3]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret <3 x i32> [[VAL_2]] +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define <3 x float> @_cont_ObjectRayOrigin3( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret <3 x float> [[VAL_2]] +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define <3 x float> @_cont_ObjectRayDirection3( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 1, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret <3 x float> [[VAL_2]] +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define float @_cont_RayTCurrent( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES:%.*]] = load float, ptr [[RESPTR]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret float [[RES]] +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyRayGen( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !lgc.cps [[META23]] !continuation [[META36:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP4]] to ptr +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP5]]) #[[ATTR1:[0-9]+]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA37:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR_I:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR3:[0-9]+]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP13]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP10]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP16]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP14]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP18]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP14]], i64 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] (...) @lgc.cps.await__s_struct.DispatchSystemDatas(i32 4, i32 4, i32 5), !continuation.returnedRegistercount !34, !continuation.registercount [[META34:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP23]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = load i32, ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP22]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP26]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP26]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[TMP26]], i64 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP21]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[DOTSPLIT:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: .split: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA37]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP34]], i8 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP35]], i8 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP36]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = extractelement <4 x float> [[TMP33]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = extractelement <4 x float> [[TMP33]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP40:%.*]] = extractelement <4 x float> [[TMP33]], i64 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP41:%.*]] = extractelement <4 x float> [[TMP33]], i64 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP37]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP38]], float [[TMP39]], float [[TMP40]], float [[TMP41]], i8 15) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP5]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret void +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyClosestHitShader( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40:![0-9]+]] !lgc.cps [[META41:![0-9]+]] !continuation [[META42:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP4]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP7]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[HITATTRS]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load <2 x float>, ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP12]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = fsub fast float 1.000000e+00, [[TMP13]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[TMP12]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = fsub fast float [[TMP14]], [[TMP15]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = insertelement <4 x float> undef, float [[TMP16]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP17]], float [[TMP13]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP15]], i64 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = insertelement <4 x float> [[TMP19]], float 1.000000e+00, i64 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP20]], ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP23]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP25]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP22]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP26]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP28]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP26]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP30]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[TMP26]], i64 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP32]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP34]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyAnyHitShader( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META43:![0-9]+]] !lgc.cps [[META40]] !continuation [[META44:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_HITDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[TMP13]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP14]], ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP20]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = load <4 x float>, ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I1:%.*]] = load [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I1]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_1_I2:%.*]] = load float, ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_2_I3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP4]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_2_I4:%.*]] = load float, ptr [[RESPTR_2_I3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_3_I5:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP4]], i32 0, i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_3_I6:%.*]] = load float, ptr [[RESPTR_3_I5]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_0_I7:%.*]] = insertelement <3 x float> undef, float [[RES_1_I2]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_1_I8:%.*]] = insertelement <3 x float> [[VAL_0_I7]], float [[RES_2_I4]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_2_I9:%.*]] = insertelement <3 x float> [[VAL_1_I8]], float [[RES_3_I6]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[VAL_2_I9]], i8 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I:%.*]] = load [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_1_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_1_I:%.*]] = load float, ptr [[RESPTR_1_I]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_2_I:%.*]] = load float, ptr [[RESPTR_2_I]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_3_I:%.*]] = load float, ptr [[RESPTR_3_I]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x float> undef, float [[RES_1_I]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x float> [[VAL_0_I]], float [[RES_2_I]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x float> [[VAL_1_I]], float [[RES_3_I]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[VAL_2_I]], i8 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I10:%.*]] = load [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I10]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I11:%.*]] = load float, ptr [[RESPTR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = fmul fast float [[RES_I11]], [[EXTRACT]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = fadd fast float [[TMP26]], [[EXTRACT1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = fcmp fast ogt float [[TMP27]], 0.000000e+00 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], 1.000000e+00 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = fcmp fast ogt float [[TMP27]], -1.000000e+00 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP28]], label [[TMP31:%.*]], label [[TMP76:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 31: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP22]], ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP29]], label [[TMP32:%.*]], label [[TMP54:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 32: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP33]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[TMP34]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[TMP35]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP37]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr [[TMP34]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[TMP38]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP40]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP41:%.*]] = getelementptr i32, ptr [[TMP38]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP42]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[TMP38]], i64 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP44]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP45]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP47]], ptr [[TMP46]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP48]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP50]], ptr [[TMP49]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP51:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP51]], ptr [[TMP52]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP53:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP53]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; LOWERRAYTRACINGPIPELINE-CPS: 54: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP55]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP56]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP58:%.*]] = getelementptr i32, ptr [[TMP57]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP59:%.*]] = load i32, ptr [[TMP58]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP59]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP56]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP61:%.*]] = getelementptr i32, ptr [[TMP60]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP62:%.*]] = load i32, ptr [[TMP61]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP62]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP63:%.*]] = getelementptr i32, ptr [[TMP60]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP64]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[TMP60]], i64 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP66]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP69:%.*]] = load i32, ptr [[TMP67]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP69]], ptr [[TMP68]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP70]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP72]], ptr [[TMP71]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP73:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP73]], ptr [[TMP74]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP75:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP75]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; LOWERRAYTRACINGPIPELINE-CPS: 76: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP30]], label [[TMP77:%.*]], label [[TMP122:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 77: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP29]], label [[TMP78:%.*]], label [[TMP100:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 78: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP79]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP81:%.*]] = getelementptr i32, ptr [[TMP80]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP82:%.*]] = getelementptr i32, ptr [[TMP81]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP82]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP83]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP84:%.*]] = getelementptr i32, ptr [[TMP80]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP85:%.*]] = getelementptr i32, ptr [[TMP84]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP86]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP87:%.*]] = getelementptr i32, ptr [[TMP84]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP87]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP88]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP89:%.*]] = getelementptr i32, ptr [[TMP84]], i64 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP89]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP90]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP91:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP92:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP93:%.*]] = load i32, ptr [[TMP91]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP93]], ptr [[TMP92]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP94:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP95:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP94]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP96]], ptr [[TMP95]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP97:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP97]], ptr [[TMP98]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP99:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP99]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; LOWERRAYTRACINGPIPELINE-CPS: 100: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP101]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP103:%.*]] = getelementptr i32, ptr [[TMP102]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP104:%.*]] = getelementptr i32, ptr [[TMP103]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP105:%.*]] = load i32, ptr [[TMP104]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP105]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP106:%.*]] = getelementptr i32, ptr [[TMP102]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP107:%.*]] = getelementptr i32, ptr [[TMP106]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP108:%.*]] = load i32, ptr [[TMP107]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP108]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP109:%.*]] = getelementptr i32, ptr [[TMP106]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP110:%.*]] = load i32, ptr [[TMP109]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP110]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP111:%.*]] = getelementptr i32, ptr [[TMP106]], i64 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP112:%.*]] = load i32, ptr [[TMP111]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP112]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP113:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP114:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP115:%.*]] = load i32, ptr [[TMP113]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP115]], ptr [[TMP114]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP116:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP117:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP118:%.*]] = load i32, ptr [[TMP116]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP118]], ptr [[TMP117]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP119:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP119]], ptr [[TMP120]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP121:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP121]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; LOWERRAYTRACINGPIPELINE-CPS: 122: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP22]], ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP124:%.*]] = getelementptr i32, ptr [[TMP123]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP125:%.*]] = getelementptr i32, ptr [[TMP124]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP126:%.*]] = load i32, ptr [[TMP125]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP126]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP127:%.*]] = getelementptr i32, ptr [[TMP123]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP128:%.*]] = getelementptr i32, ptr [[TMP127]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP129:%.*]] = load i32, ptr [[TMP128]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP129]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP130:%.*]] = getelementptr i32, ptr [[TMP127]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP131:%.*]] = load i32, ptr [[TMP130]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP131]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP132:%.*]] = getelementptr i32, ptr [[TMP127]], i64 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP133:%.*]] = load i32, ptr [[TMP132]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP133]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP134:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP135:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP136:%.*]] = load i32, ptr [[TMP134]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP136]], ptr [[TMP135]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP137:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP138:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP139:%.*]] = load i32, ptr [[TMP137]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP139]], ptr [[TMP138]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP140:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP140]], ptr [[TMP141]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP142:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP142]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyIntersectionShader( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META41]] !lgc.cps [[META45:![0-9]+]] !continuation [[META46:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I:%.*]] = load [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I2:%.*]] = load float, ptr [[RESPTR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = bitcast ptr [[TMP4]] to ptr +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP6]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGTPTR_I:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGT_I:%.*]] = load float, ptr [[ORIGTPTR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I2]], [[ORIGT_I]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: callAHit.i: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] (...) @lgc.cps.await__s_struct.AnyHitTraversalDatas(i32 3, i32 8, i32 5, float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP8]]), !continuation.returnedRegistercount !33, !continuation.registercount [[META33:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP9]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: isEnd.i: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP17]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT]] +; LOWERRAYTRACINGPIPELINE-CPS: _cont_ReportHit.exit: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP20:%.*]], label [[TMP22:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 20: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP21]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; LOWERRAYTRACINGPIPELINE-CPS: 22: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP6]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP23]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyIntersectionShader2( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META41]] !lgc.cps [[META45]] !continuation [[META47:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I:%.*]] = load [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I2:%.*]] = load float, ptr [[RESPTR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = bitcast ptr [[TMP4]] to ptr +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP6]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGTPTR_I:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGT_I:%.*]] = load float, ptr [[ORIGTPTR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I2]], [[ORIGT_I]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: callAHit.i: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] (...) @lgc.cps.await__s_struct.AnyHitTraversalDatas(i32 3, i32 8, i32 5, float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[TMP8]]), !continuation.returnedRegistercount !33, !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP9]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: isEnd.i: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP17]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT]] +; LOWERRAYTRACINGPIPELINE-CPS: _cont_ReportHit.exit: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP20:%.*]], label [[TMP22:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 20: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP21]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; LOWERRAYTRACINGPIPELINE-CPS: 22: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP6]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP23]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyMissShader( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META45]] !lgc.cps [[META41]] !continuation [[META48:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> , ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP7]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP10]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP12]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i64 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP14]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP16]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; +; +; REGISTERBUFFER-CPS-LABEL: define i32 @_cont_GetContinuationStackAddr( +; REGISTERBUFFER-CPS-SAME: ) #[[ATTR0:[0-9]+]] { +; REGISTERBUFFER-CPS-NEXT: ret i32 0 +; +; +; REGISTERBUFFER-CPS-LABEL: define %struct.HitData @_cont_GetCandidateState( +; REGISTERBUFFER-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; REGISTERBUFFER-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_ANYHITTRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RES:%.*]] = load [[STRUCT_HITDATA:%.*]], ptr [[RESPTR]], align 4 +; REGISTERBUFFER-CPS-NEXT: ret [[STRUCT_HITDATA]] [[RES]] +; +; +; REGISTERBUFFER-CPS-LABEL: define void @_cont_SetTriangleHitAttributes( +; REGISTERBUFFER-CPS-SAME: ptr [[DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[VAL:%.*]]) { +; REGISTERBUFFER-CPS-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]], ptr [[ADDR]], align 4 +; REGISTERBUFFER-CPS-NEXT: ret void +; +; +; REGISTERBUFFER-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; REGISTERBUFFER-CPS-SAME: ptr [[DATA:%.*]]) { +; REGISTERBUFFER-CPS-NEXT: ret i32 5 +; +; +; REGISTERBUFFER-CPS-LABEL: define i1 @_cont_IsEndSearch( +; REGISTERBUFFER-CPS-SAME: ptr [[TMP0:%.*]]) #[[ATTR0]] { +; REGISTERBUFFER-CPS-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() +; REGISTERBUFFER-CPS-NEXT: ret i1 [[ISEND]] +; +; +; REGISTERBUFFER-CPS-LABEL: define <3 x i32> @_cont_DispatchRaysIndex3( +; REGISTERBUFFER-CPS-SAME: ptr [[DATA:%.*]]) { +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_1:%.*]] = load i32, ptr [[RESPTR_1]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_2:%.*]] = load i32, ptr [[RESPTR_2]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_3:%.*]] = load i32, ptr [[RESPTR_3]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x i32> [[VAL_0]], i32 [[RES_2]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x i32> [[VAL_1]], i32 [[RES_3]], i32 2 +; REGISTERBUFFER-CPS-NEXT: ret <3 x i32> [[VAL_2]] +; +; +; REGISTERBUFFER-CPS-LABEL: define <3 x float> @_cont_ObjectRayOrigin3( +; REGISTERBUFFER-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 +; REGISTERBUFFER-CPS-NEXT: ret <3 x float> [[VAL_2]] +; +; +; REGISTERBUFFER-CPS-LABEL: define <3 x float> @_cont_ObjectRayDirection3( +; REGISTERBUFFER-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 +; REGISTERBUFFER-CPS-NEXT: ret <3 x float> [[VAL_2]] +; +; +; REGISTERBUFFER-CPS-LABEL: define float @_cont_RayTCurrent( +; REGISTERBUFFER-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; REGISTERBUFFER-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[RES:%.*]] = load float, ptr [[RESPTR]], align 4 +; REGISTERBUFFER-CPS-NEXT: ret float [[RES]] +; +; +; REGISTERBUFFER-CPS-LABEL: define void @MyRayGen( +; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !lgc.cps [[META22]] !continuation [[META35:![0-9]+]] { +; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP1]]) +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP3]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP4]]) +; REGISTERBUFFER-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyRayGen.resume.0) +; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP7]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP8]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i32 [[TMP6]], i32 5), !continuation.returnedRegistercount !33, !continuation.registercount [[META33:![0-9]+]] +; REGISTERBUFFER-CPS-NEXT: unreachable +; +; +; REGISTERBUFFER-CPS-LABEL: define dso_local void @MyRayGen.resume.0( +; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META22]] !lgc.cps [[META22]] !continuation [[META35]] { +; REGISTERBUFFER-CPS-NEXT: entryresume.0: +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; REGISTERBUFFER-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], ptr [[TMP4]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP8]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP10]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP12]], i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT6:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; REGISTERBUFFER-CPS-NEXT: [[TMP13:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; REGISTERBUFFER-CPS-NEXT: [[RES_1_I1:%.*]] = load i32, ptr [[TMP4]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_2_I2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_2_I3:%.*]] = load i32, ptr [[RESPTR_2_I2]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_3_I4:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_3_I5:%.*]] = load i32, ptr [[RESPTR_3_I4]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[VAL_0_I6:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I1]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[VAL_1_I7:%.*]] = insertelement <3 x i32> [[VAL_0_I6]], i32 [[RES_2_I3]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[VAL_2_I8:%.*]] = insertelement <3 x i32> [[VAL_1_I7]], i32 [[RES_3_I5]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[VAL_2_I8]], i8 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_1_I:%.*]] = load i32, ptr [[TMP4]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_2_I:%.*]] = load i32, ptr [[RESPTR_2_I]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_3_I:%.*]] = load i32, ptr [[RESPTR_3_I]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x i32> [[VAL_0_I]], i32 [[RES_2_I]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x i32> [[VAL_1_I]], i32 [[RES_3_I]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[VAL_2_I]], i8 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP14:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP13]]) +; REGISTERBUFFER-CPS-NEXT: [[TMP15:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP14]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; REGISTERBUFFER-CPS-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 3 +; REGISTERBUFFER-CPS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP15]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP16]], float [[TMP17]], float [[TMP18]], float [[TMP19]], i8 15) +; REGISTERBUFFER-CPS-NEXT: ret void +; +; +; REGISTERBUFFER-CPS-LABEL: define void @MyClosestHitShader( +; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META36:![0-9]+]] !lgc.cps [[META37:![0-9]+]] !continuation [[META38:![0-9]+]] { +; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_03_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[DOTSROA_03_0_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float +; REGISTERBUFFER-CPS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_03_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_03_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; REGISTERBUFFER-CPS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP5]], i32 1 +; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = fsub fast float 1.000000e+00, [[TMP6]] +; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = fsub fast float [[TMP7]], [[TMP8]] +; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i64 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP6]], i64 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float 1.000000e+00, i64 3 +; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP14:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP14]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP15:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP16:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP18]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: unreachable +; +; +; REGISTERBUFFER-CPS-LABEL: define void @MyAnyHitShader( +; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META39:![0-9]+]] !lgc.cps [[META36]] !continuation [[META40:![0-9]+]] { +; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_0_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[DOTFCA_0_1_0_EXTRACT]], ptr [[DOTFCA_0_1_0_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[DOTFCA_0_1_1_EXTRACT]], ptr [[DOTFCA_0_1_1_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: store float [[DOTFCA_0_1_2_EXTRACT]], ptr [[DOTFCA_0_1_2_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: store i32 [[DOTFCA_0_1_3_EXTRACT]], ptr [[DOTFCA_0_1_3_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[DOTFCA_0_2_EXTRACT]], ptr [[DOTFCA_0_2_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[DOTFCA_0_3_EXTRACT]], ptr [[DOTFCA_0_3_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; REGISTERBUFFER-CPS-NEXT: store float [[DOTFCA_0_4_EXTRACT]], ptr [[DOTFCA_0_4_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; REGISTERBUFFER-CPS-NEXT: store i64 [[DOTFCA_0_5_EXTRACT]], ptr [[DOTFCA_0_5_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[DOTFCA_1_0_EXTRACT]], ptr [[DOTFCA_1_0_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: store float [[DOTFCA_1_2_EXTRACT]], ptr [[DOTFCA_1_2_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: store i32 [[DOTFCA_1_3_EXTRACT]], ptr [[DOTFCA_1_3_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[TMP2]]) +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT233:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP3]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0235_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT233]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0235_0_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0235_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT233]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0235_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 +; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA:%.*]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_0_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I1_FCA_0_LOAD]], 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_1_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_0_INSERT]], <3 x float> [[RES_I1_FCA_1_LOAD]], 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I1_FCA_2_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_1_INSERT]], float [[RES_I1_FCA_2_LOAD]], 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I1_FCA_3_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_2_INSERT]], i32 [[RES_I1_FCA_3_LOAD]], 3 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0256_0_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0256_4_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0256_8_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[VAL_0_I7:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_0256_0_VEC_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[VAL_1_I8:%.*]] = insertelement <3 x float> [[VAL_0_I7]], float [[DOTSROA_0256_4_VEC_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[VAL_2_I9:%.*]] = insertelement <3 x float> [[VAL_1_I8]], float [[DOTSROA_0256_8_VEC_EXTRACT]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[VAL_2_I9]], i8 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_0_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I_FCA_0_LOAD]], 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_1_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[RES_I_FCA_1_LOAD]], 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I_FCA_2_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[RES_I_FCA_2_LOAD]], 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I_FCA_3_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[RES_I_FCA_3_LOAD]], 3 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_1_12_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_1_16_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_1_20_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_1_12_VEC_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x float> [[VAL_0_I]], float [[DOTSROA_1_16_VEC_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x float> [[VAL_1_I]], float [[DOTSROA_1_20_VEC_EXTRACT]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[VAL_2_I]], i8 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I10_FCA_0_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I10_FCA_0_LOAD]], 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I10_FCA_1_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_0_INSERT]], <3 x float> [[RES_I10_FCA_1_LOAD]], 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I10_FCA_2_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_1_INSERT]], float [[RES_I10_FCA_2_LOAD]], 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I10_FCA_3_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_2_INSERT]], i32 [[RES_I10_FCA_3_LOAD]], 3 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = fmul fast float [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT]], [[EXTRACT]] +; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = fadd fast float [[TMP9]], [[EXTRACT1]] +; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], 0.000000e+00 +; REGISTERBUFFER-CPS-NEXT: [[TMP12:%.*]] = fcmp fast ogt float [[TMP10]], 1.000000e+00 +; REGISTERBUFFER-CPS-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP10]], -1.000000e+00 +; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP11]], label [[TMP14:%.*]], label [[TMP37:%.*]] +; REGISTERBUFFER-CPS: 14: +; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP12]], label [[TMP15:%.*]], label [[TMP26:%.*]] +; REGISTERBUFFER-CPS: 15: +; REGISTERBUFFER-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP16]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP17]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP18:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP21:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP22:%.*]] = bitcast i32 [[TMP21]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0237_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP22]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP23:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0237_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0237_0_VEC_INSERT]], float [[TMP24]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0237_4_VEC_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT55:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP25]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT55]], ptr [[DOTFCA_0_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP56:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP56]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP57:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP57]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP58:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP58]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_LOAD]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP59:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP59]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_LOAD]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP60:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP60]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_LOAD]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP61:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP61]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_LOAD]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP62:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP62]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_LOAD]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP63:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP63]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_LOAD]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP64:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP64]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_LOAD]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP65:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP65]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_LOAD]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP66:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP66]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_LOAD]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP67:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_1_2_GEP67]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_LOAD]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP68:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP68]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_LOAD]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: unreachable +; REGISTERBUFFER-CPS: 26: +; REGISTERBUFFER-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP27]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT24:%.*]] = extractelement <4 x float> undef, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP28:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT24]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP28]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT32:%.*]] = extractelement <4 x float> undef, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP29:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT32]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP29]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT40:%.*]] = extractelement <4 x float> undef, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP30:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT40]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP30]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT48:%.*]] = extractelement <4 x float> undef, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP31:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT48]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP31]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP32:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP33:%.*]] = bitcast i32 [[TMP32]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0241_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP33]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP34:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP35:%.*]] = bitcast i32 [[TMP34]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0241_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0241_0_VEC_INSERT]], float [[TMP35]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT240:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0241_4_VEC_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT69:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT240]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP70:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP36]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT69]], ptr [[DOTFCA_0_GEP70]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP71:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD72:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP71]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT73:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD72]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP74:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD75:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP74]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT76:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT73]], <3 x float> [[DOTFCA_0_1_0_LOAD75]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP77:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD78:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP77]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT79:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT76]], <3 x float> [[DOTFCA_0_1_1_LOAD78]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP80:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD81:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP80]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT82:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT79]], float [[DOTFCA_0_1_2_LOAD81]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP83:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD84:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP83]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT85:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT82]], i32 [[DOTFCA_0_1_3_LOAD84]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP86:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD87:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP86]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT88:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT85]], <3 x float> [[DOTFCA_0_2_LOAD87]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP89:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD90:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP89]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT91:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT88]], <3 x float> [[DOTFCA_0_3_LOAD90]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP92:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD93:%.*]] = load float, ptr [[DOTFCA_0_4_GEP92]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT94:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT91]], float [[DOTFCA_0_4_LOAD93]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP95:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD96:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP95]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT97:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT94]], i64 [[DOTFCA_0_5_LOAD96]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP98:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD99:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP98]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT100:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT97]], <3 x float> [[DOTFCA_1_0_LOAD99]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP101:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD102:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP101]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT103:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT100]], <3 x float> [[DOTFCA_1_1_LOAD102]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP104:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD105:%.*]] = load float, ptr [[DOTFCA_1_2_GEP104]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT106:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT103]], float [[DOTFCA_1_2_LOAD105]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP107:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD108:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP107]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT109:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT106]], i32 [[DOTFCA_1_3_LOAD108]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT109]]) +; REGISTERBUFFER-CPS-NEXT: unreachable +; REGISTERBUFFER-CPS: 37: +; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP13]], label [[TMP38:%.*]], label [[TMP57:%.*]] +; REGISTERBUFFER-CPS: 38: +; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP12]], label [[TMP39:%.*]], label [[TMP48:%.*]] +; REGISTERBUFFER-CPS: 39: +; REGISTERBUFFER-CPS-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP40]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT26:%.*]] = extractelement <4 x float> undef, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP41:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT26]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP41]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT34:%.*]] = extractelement <4 x float> undef, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP42:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT34]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP42]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT42:%.*]] = extractelement <4 x float> undef, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP43:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT42]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP43]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT50:%.*]] = extractelement <4 x float> undef, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP44:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT50]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP44]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP45:%.*]] = bitcast i32 [[TMP4]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0245_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP45]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP46:%.*]] = bitcast i32 [[TMP5]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0245_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0245_0_VEC_INSERT]], float [[TMP46]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT244:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0245_4_VEC_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT110:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT244]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP111:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP47]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT110]], ptr [[DOTFCA_0_GEP111]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP112:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD113:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP112]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT114:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD113]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP115:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD116:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP115]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT117:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT114]], <3 x float> [[DOTFCA_0_1_0_LOAD116]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP118:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD119:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP118]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT120:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT117]], <3 x float> [[DOTFCA_0_1_1_LOAD119]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP121:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD122:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP121]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT123:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT120]], float [[DOTFCA_0_1_2_LOAD122]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP124:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD125:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP124]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT126:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT123]], i32 [[DOTFCA_0_1_3_LOAD125]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP127:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD128:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP127]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT129:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT126]], <3 x float> [[DOTFCA_0_2_LOAD128]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP130:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD131:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP130]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT132:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT129]], <3 x float> [[DOTFCA_0_3_LOAD131]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP133:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD134:%.*]] = load float, ptr [[DOTFCA_0_4_GEP133]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT135:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT132]], float [[DOTFCA_0_4_LOAD134]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP136:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD137:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP136]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT138:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT135]], i64 [[DOTFCA_0_5_LOAD137]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP139:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD140:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP139]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT141:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT138]], <3 x float> [[DOTFCA_1_0_LOAD140]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP142:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD143:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP142]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT144:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT141]], <3 x float> [[DOTFCA_1_1_LOAD143]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP145:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD146:%.*]] = load float, ptr [[DOTFCA_1_2_GEP145]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT147:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT144]], float [[DOTFCA_1_2_LOAD146]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP148:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD149:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP148]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT150:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT147]], i32 [[DOTFCA_1_3_LOAD149]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT150]]) +; REGISTERBUFFER-CPS-NEXT: unreachable +; REGISTERBUFFER-CPS: 48: +; REGISTERBUFFER-CPS-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP49]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT28:%.*]] = extractelement <4 x float> undef, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP50:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT28]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP50]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT36:%.*]] = extractelement <4 x float> undef, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP51:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT36]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP51]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT44:%.*]] = extractelement <4 x float> undef, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP52:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT44]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP52]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT52:%.*]] = extractelement <4 x float> undef, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP53:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT52]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP53]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP54:%.*]] = bitcast i32 [[TMP4]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0249_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP54]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP55:%.*]] = bitcast i32 [[TMP5]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0249_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0249_0_VEC_INSERT]], float [[TMP55]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT248:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0249_4_VEC_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT151:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT248]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP152:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP56]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT151]], ptr [[DOTFCA_0_GEP152]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP153:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD154:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP153]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT155:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD154]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP156:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD157:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP156]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT158:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT155]], <3 x float> [[DOTFCA_0_1_0_LOAD157]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP159:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD160:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP159]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT161:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT158]], <3 x float> [[DOTFCA_0_1_1_LOAD160]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP162:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD163:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP162]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT164:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT161]], float [[DOTFCA_0_1_2_LOAD163]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP165:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD166:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP165]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT167:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT164]], i32 [[DOTFCA_0_1_3_LOAD166]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP168:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD169:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP168]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT170:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT167]], <3 x float> [[DOTFCA_0_2_LOAD169]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP171:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD172:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP171]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT173:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT170]], <3 x float> [[DOTFCA_0_3_LOAD172]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP174:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD175:%.*]] = load float, ptr [[DOTFCA_0_4_GEP174]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT176:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT173]], float [[DOTFCA_0_4_LOAD175]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP177:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD178:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP177]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT179:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT176]], i64 [[DOTFCA_0_5_LOAD178]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP180:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD181:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP180]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT182:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT179]], <3 x float> [[DOTFCA_1_0_LOAD181]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP183:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD184:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP183]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT185:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT182]], <3 x float> [[DOTFCA_1_1_LOAD184]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP186:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD187:%.*]] = load float, ptr [[DOTFCA_1_2_GEP186]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT188:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT185]], float [[DOTFCA_1_2_LOAD187]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP189:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD190:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP189]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT191:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT188]], i32 [[DOTFCA_1_3_LOAD190]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT191]]) +; REGISTERBUFFER-CPS-NEXT: unreachable +; REGISTERBUFFER-CPS: 57: +; REGISTERBUFFER-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT30:%.*]] = extractelement <4 x float> undef, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP58:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT30]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP58]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT38:%.*]] = extractelement <4 x float> undef, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP59:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT38]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP59]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT46:%.*]] = extractelement <4 x float> undef, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP60:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT46]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP60]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT54:%.*]] = extractelement <4 x float> undef, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP61:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT54]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP61]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP62:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP63:%.*]] = bitcast i32 [[TMP62]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0253_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP63]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP64:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP65:%.*]] = bitcast i32 [[TMP64]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0253_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0253_0_VEC_INSERT]], float [[TMP65]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT252:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0253_4_VEC_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT192:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT252]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP193:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP66]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT192]], ptr [[DOTFCA_0_GEP193]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP194:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD195:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP194]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT196:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD195]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP197:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD198:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP197]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT199:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT196]], <3 x float> [[DOTFCA_0_1_0_LOAD198]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP200:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD201:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP200]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT202:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT199]], <3 x float> [[DOTFCA_0_1_1_LOAD201]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP203:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD204:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP203]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT205:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT202]], float [[DOTFCA_0_1_2_LOAD204]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP206:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD207:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP206]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT208:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT205]], i32 [[DOTFCA_0_1_3_LOAD207]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP209:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD210:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP209]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT211:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT208]], <3 x float> [[DOTFCA_0_2_LOAD210]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP212:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD213:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP212]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT214:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT211]], <3 x float> [[DOTFCA_0_3_LOAD213]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP215:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD216:%.*]] = load float, ptr [[DOTFCA_0_4_GEP215]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT217:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT214]], float [[DOTFCA_0_4_LOAD216]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP218:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD219:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP218]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT220:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT217]], i64 [[DOTFCA_0_5_LOAD219]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP221:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD222:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP221]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT223:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT220]], <3 x float> [[DOTFCA_1_0_LOAD222]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP224:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD225:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP224]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT226:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT223]], <3 x float> [[DOTFCA_1_1_LOAD225]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP227:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD228:%.*]] = load float, ptr [[DOTFCA_1_2_GEP227]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT229:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT226]], float [[DOTFCA_1_2_LOAD228]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP230:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD231:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP230]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT232:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT229]], i32 [[DOTFCA_1_3_LOAD231]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT232]]) +; REGISTERBUFFER-CPS-NEXT: unreachable +; +; +; REGISTERBUFFER-CPS-LABEL: define void @MyIntersectionShader( +; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41:![0-9]+]] !continuation [[META42:![0-9]+]] { +; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: +; REGISTERBUFFER-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(32) [[RETURN_ADDR_SPILL_ADDR]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] to <3 x float> +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP1]], 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP2]], 3 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[DOTFCA_0_4_EXTRACT]] +; REGISTERBUFFER-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] +; REGISTERBUFFER-CPS: callAHit.i: +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyIntersectionShader.resume.0) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 8, {} poison, i32 [[TMP3]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32:![0-9]+]] +; REGISTERBUFFER-CPS-NEXT: unreachable +; REGISTERBUFFER-CPS: isEnd.i: +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP7]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <3 x i32> +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] +; REGISTERBUFFER-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP9:%.*]], label [[TMP10:%.*]] +; REGISTERBUFFER-CPS: 9: +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; REGISTERBUFFER-CPS-NEXT: unreachable +; REGISTERBUFFER-CPS: 10: +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: unreachable +; +; +; REGISTERBUFFER-CPS-LABEL: define dso_local void @MyIntersectionShader.resume.0( +; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META42]] { +; REGISTERBUFFER-CPS-NEXT: entryresume.0: +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; REGISTERBUFFER-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP5:%.*]], label [[TMP6:%.*]] +; REGISTERBUFFER-CPS: 5: +; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR1:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR1]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; REGISTERBUFFER-CPS-NEXT: unreachable +; REGISTERBUFFER-CPS: 6: +; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: unreachable +; +; +; REGISTERBUFFER-CPS-LABEL: define void @MyIntersectionShader2( +; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43:![0-9]+]] { +; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: +; REGISTERBUFFER-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER2_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(32) [[RETURN_ADDR_SPILL_ADDR]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] to <3 x float> +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP1]], 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP2]], 3 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[DOTFCA_0_4_EXTRACT]] +; REGISTERBUFFER-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] +; REGISTERBUFFER-CPS: callAHit.i: +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]] poison, <2 x float> undef, 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyIntersectionShader2.resume.0) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 8, {} poison, i32 [[TMP3]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32]] +; REGISTERBUFFER-CPS-NEXT: unreachable +; REGISTERBUFFER-CPS: isEnd.i: +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP7]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <3 x i32> +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] +; REGISTERBUFFER-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP9:%.*]], label [[TMP10:%.*]] +; REGISTERBUFFER-CPS: 9: +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; REGISTERBUFFER-CPS-NEXT: unreachable +; REGISTERBUFFER-CPS: 10: +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: unreachable +; +; +; REGISTERBUFFER-CPS-LABEL: define dso_local void @MyIntersectionShader2.resume.0( +; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43]] { +; REGISTERBUFFER-CPS-NEXT: entryresume.0: +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; REGISTERBUFFER-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP5:%.*]], label [[TMP6:%.*]] +; REGISTERBUFFER-CPS: 5: +; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR1:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER2_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR1]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; REGISTERBUFFER-CPS-NEXT: unreachable +; REGISTERBUFFER-CPS: 6: +; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER2_FRAME]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: unreachable +; +; +; REGISTERBUFFER-CPS-LABEL: define void @MyMissShader( +; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META41]] !lgc.cps [[META37]] !continuation [[META44:![0-9]+]] { +; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP1]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP2]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP3]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP4]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: unreachable +; +; +; POSTPROCESS-CPS-LABEL: define i32 @_cont_GetContinuationStackAddr( +; POSTPROCESS-CPS-SAME: ) #[[ATTR0:[0-9]+]] { +; POSTPROCESS-CPS-NEXT: ret i32 0 +; +; +; POSTPROCESS-CPS-LABEL: define %struct.HitData @_cont_GetCandidateState( +; POSTPROCESS-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; POSTPROCESS-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_ANYHITTRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[RES:%.*]] = load [[STRUCT_HITDATA:%.*]], ptr [[RESPTR]], align 4 +; POSTPROCESS-CPS-NEXT: ret [[STRUCT_HITDATA]] [[RES]] +; +; +; POSTPROCESS-CPS-LABEL: define void @_cont_SetTriangleHitAttributes( +; POSTPROCESS-CPS-SAME: ptr [[DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[VAL:%.*]]) { +; POSTPROCESS-CPS-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]], ptr [[ADDR]], align 4 +; POSTPROCESS-CPS-NEXT: ret void +; +; +; POSTPROCESS-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; POSTPROCESS-CPS-SAME: ptr [[DATA:%.*]]) { +; POSTPROCESS-CPS-NEXT: ret i32 5 +; +; +; POSTPROCESS-CPS-LABEL: define i1 @_cont_IsEndSearch( +; POSTPROCESS-CPS-SAME: ptr [[TMP0:%.*]]) #[[ATTR0]] { +; POSTPROCESS-CPS-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() +; POSTPROCESS-CPS-NEXT: ret i1 [[ISEND]] +; +; +; POSTPROCESS-CPS-LABEL: define <3 x i32> @_cont_DispatchRaysIndex3( +; POSTPROCESS-CPS-SAME: ptr [[DATA:%.*]]) { +; POSTPROCESS-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[RES_1:%.*]] = load i32, ptr [[RESPTR_1]], align 4 +; POSTPROCESS-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 1 +; POSTPROCESS-CPS-NEXT: [[RES_2:%.*]] = load i32, ptr [[RESPTR_2]], align 4 +; POSTPROCESS-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[RES_3:%.*]] = load i32, ptr [[RESPTR_3]], align 4 +; POSTPROCESS-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1]], i32 0 +; POSTPROCESS-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x i32> [[VAL_0]], i32 [[RES_2]], i32 1 +; POSTPROCESS-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x i32> [[VAL_1]], i32 [[RES_3]], i32 2 +; POSTPROCESS-CPS-NEXT: ret <3 x i32> [[VAL_2]] +; +; +; POSTPROCESS-CPS-LABEL: define <3 x float> @_cont_ObjectRayOrigin3( +; POSTPROCESS-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; POSTPROCESS-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 +; POSTPROCESS-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 1 +; POSTPROCESS-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 +; POSTPROCESS-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 +; POSTPROCESS-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 +; POSTPROCESS-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 +; POSTPROCESS-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 +; POSTPROCESS-CPS-NEXT: ret <3 x float> [[VAL_2]] +; +; +; POSTPROCESS-CPS-LABEL: define <3 x float> @_cont_ObjectRayDirection3( +; POSTPROCESS-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; POSTPROCESS-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 +; POSTPROCESS-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 +; POSTPROCESS-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 +; POSTPROCESS-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 +; POSTPROCESS-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 +; POSTPROCESS-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 +; POSTPROCESS-CPS-NEXT: ret <3 x float> [[VAL_2]] +; +; +; POSTPROCESS-CPS-LABEL: define float @_cont_RayTCurrent( +; POSTPROCESS-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; POSTPROCESS-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[RES:%.*]] = load float, ptr [[RESPTR]], align 4 +; POSTPROCESS-CPS-NEXT: ret float [[RES]] +; +; +; POSTPROCESS-CPS-LABEL: define void @MyRayGen( +; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !lgc.cps [[META22]] !continuation [[META35:![0-9]+]] { +; POSTPROCESS-CPS-NEXT: AllocaSpillBB: +; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 +; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP5]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP6]]) +; POSTPROCESS-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyRayGen.resume.0) +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP9]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP10]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i32 [[TMP8]], i32 5), !continuation.returnedRegistercount !33, !continuation.registercount [[META33:![0-9]+]] +; POSTPROCESS-CPS-NEXT: unreachable +; +; +; POSTPROCESS-CPS-LABEL: define dso_local void @MyRayGen.resume.0( +; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META22]] !lgc.cps [[META22]] !continuation [[META35]] { +; POSTPROCESS-CPS-NEXT: entryresume.0: +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], ptr [[TMP4]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP8]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP10]], i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP12]], i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT6:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POSTPROCESS-CPS-NEXT: [[RES_1_I1:%.*]] = load i32, ptr [[TMP4]], align 4 +; POSTPROCESS-CPS-NEXT: [[RESPTR_2_I2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 1 +; POSTPROCESS-CPS-NEXT: [[RES_2_I3:%.*]] = load i32, ptr [[RESPTR_2_I2]], align 4 +; POSTPROCESS-CPS-NEXT: [[RESPTR_3_I4:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[RES_3_I5:%.*]] = load i32, ptr [[RESPTR_3_I4]], align 4 +; POSTPROCESS-CPS-NEXT: [[VAL_0_I6:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I1]], i32 0 +; POSTPROCESS-CPS-NEXT: [[VAL_1_I7:%.*]] = insertelement <3 x i32> [[VAL_0_I6]], i32 [[RES_2_I3]], i32 1 +; POSTPROCESS-CPS-NEXT: [[VAL_2_I8:%.*]] = insertelement <3 x i32> [[VAL_1_I7]], i32 [[RES_3_I5]], i32 2 +; POSTPROCESS-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[VAL_2_I8]], i8 0 +; POSTPROCESS-CPS-NEXT: [[RES_1_I:%.*]] = load i32, ptr [[TMP4]], align 4 +; POSTPROCESS-CPS-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 1 +; POSTPROCESS-CPS-NEXT: [[RES_2_I:%.*]] = load i32, ptr [[RESPTR_2_I]], align 4 +; POSTPROCESS-CPS-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[RES_3_I:%.*]] = load i32, ptr [[RESPTR_3_I]], align 4 +; POSTPROCESS-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I]], i32 0 +; POSTPROCESS-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x i32> [[VAL_0_I]], i32 [[RES_2_I]], i32 1 +; POSTPROCESS-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x i32> [[VAL_1_I]], i32 [[RES_3_I]], i32 2 +; POSTPROCESS-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[VAL_2_I]], i8 1 +; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP13]]) +; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP14]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 +; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 +; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 +; POSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 3 +; POSTPROCESS-CPS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP15]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP16]], float [[TMP17]], float [[TMP18]], float [[TMP19]], i8 15) +; POSTPROCESS-CPS-NEXT: ret void +; +; +; POSTPROCESS-CPS-LABEL: define void @MyClosestHitShader( +; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META36:![0-9]+]] !lgc.cps [[META37:![0-9]+]] !continuation [[META38:![0-9]+]] { +; POSTPROCESS-CPS-NEXT: AllocaSpillBB: +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_03_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[DOTSROA_03_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float +; POSTPROCESS-CPS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_03_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_03_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; POSTPROCESS-CPS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP5]], i32 1 +; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = fsub fast float 1.000000e+00, [[TMP6]] +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = fsub fast float [[TMP7]], [[TMP8]] +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i64 0 +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP6]], i64 1 +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 2 +; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float 1.000000e+00, i64 3 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP14]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP18]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 0 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP20]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; POSTPROCESS-CPS-NEXT: unreachable +; +; +; POSTPROCESS-CPS-LABEL: define void @MyAnyHitShader( +; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META39:![0-9]+]] !lgc.cps [[META36]] !continuation [[META40:![0-9]+]] { +; POSTPROCESS-CPS-NEXT: AllocaSpillBB: +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: store <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_0_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: store <3 x float> [[DOTFCA_0_1_0_EXTRACT]], ptr [[DOTFCA_0_1_0_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: store <3 x float> [[DOTFCA_0_1_1_EXTRACT]], ptr [[DOTFCA_0_1_1_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: store float [[DOTFCA_0_1_2_EXTRACT]], ptr [[DOTFCA_0_1_2_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: store i32 [[DOTFCA_0_1_3_EXTRACT]], ptr [[DOTFCA_0_1_3_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: store <3 x float> [[DOTFCA_0_2_EXTRACT]], ptr [[DOTFCA_0_2_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-CPS-NEXT: store <3 x float> [[DOTFCA_0_3_EXTRACT]], ptr [[DOTFCA_0_3_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-CPS-NEXT: store float [[DOTFCA_0_4_EXTRACT]], ptr [[DOTFCA_0_4_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-CPS-NEXT: store i64 [[DOTFCA_0_5_EXTRACT]], ptr [[DOTFCA_0_5_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: store <3 x float> [[DOTFCA_1_0_EXTRACT]], ptr [[DOTFCA_1_0_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: store <3 x float> [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: store float [[DOTFCA_1_2_EXTRACT]], ptr [[DOTFCA_1_2_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: store i32 [[DOTFCA_1_3_EXTRACT]], ptr [[DOTFCA_1_3_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[TMP2]]) +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT233:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP3]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0235_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT233]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0235_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0235_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT233]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0235_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 +; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA:%.*]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_0_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I1_FCA_0_LOAD]], 0 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_1_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_0_INSERT]], <3 x float> [[RES_I1_FCA_1_LOAD]], 1 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I1_FCA_2_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_1_INSERT]], float [[RES_I1_FCA_2_LOAD]], 2 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I1_FCA_3_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_2_INSERT]], i32 [[RES_I1_FCA_3_LOAD]], 3 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 1 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 2 +; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0256_0_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0256_4_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0256_8_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 2 +; POSTPROCESS-CPS-NEXT: [[VAL_0_I7:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_0256_0_VEC_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[VAL_1_I8:%.*]] = insertelement <3 x float> [[VAL_0_I7]], float [[DOTSROA_0256_4_VEC_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[VAL_2_I9:%.*]] = insertelement <3 x float> [[VAL_1_I8]], float [[DOTSROA_0256_8_VEC_EXTRACT]], i32 2 +; POSTPROCESS-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[VAL_2_I9]], i8 0 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_0_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I_FCA_0_LOAD]], 0 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_1_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[RES_I_FCA_1_LOAD]], 1 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I_FCA_2_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[RES_I_FCA_2_LOAD]], 2 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I_FCA_3_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[RES_I_FCA_3_LOAD]], 3 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_1_12_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_1_16_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_1_20_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 2 +; POSTPROCESS-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_1_12_VEC_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x float> [[VAL_0_I]], float [[DOTSROA_1_16_VEC_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x float> [[VAL_1_I]], float [[DOTSROA_1_20_VEC_EXTRACT]], i32 2 +; POSTPROCESS-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[VAL_2_I]], i8 0 +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I10_FCA_0_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I10_FCA_0_LOAD]], 0 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I10_FCA_1_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_0_INSERT]], <3 x float> [[RES_I10_FCA_1_LOAD]], 1 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I10_FCA_2_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_1_INSERT]], float [[RES_I10_FCA_2_LOAD]], 2 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I10_FCA_3_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_2_INSERT]], i32 [[RES_I10_FCA_3_LOAD]], 3 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 1 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 2 +; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 3 +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = fmul fast float [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT]], [[EXTRACT]] +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = fadd fast float [[TMP9]], [[EXTRACT1]] +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], 0.000000e+00 +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = fcmp fast ogt float [[TMP10]], 1.000000e+00 +; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP10]], -1.000000e+00 +; POSTPROCESS-CPS-NEXT: br i1 [[TMP11]], label [[TMP14:%.*]], label [[TMP41:%.*]] +; POSTPROCESS-CPS: 14: +; POSTPROCESS-CPS-NEXT: br i1 [[TMP12]], label [[TMP15:%.*]], label [[TMP28:%.*]] +; POSTPROCESS-CPS: 15: +; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP16]]) +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP17]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP22:%.*]] = bitcast i32 [[TMP21]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0237_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP22]], i32 0 +; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP23:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0237_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0237_0_VEC_INSERT]], float [[TMP24]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0237_4_VEC_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT55:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP25]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT55]], ptr [[DOTFCA_0_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP56:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP56]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP57:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP57]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP58:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP58]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_LOAD]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP59:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP59]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_LOAD]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP60:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP60]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_LOAD]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP61:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP61]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_LOAD]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP62:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP62]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_LOAD]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP63:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP63]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_LOAD]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP64:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP64]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_LOAD]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP65:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP65]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_LOAD]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP66:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP66]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_LOAD]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP67:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_1_2_GEP67]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_LOAD]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP68:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP68]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_LOAD]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP26:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP27:%.*]] = add i32 [[TMP26]], 0 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP27]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; POSTPROCESS-CPS-NEXT: unreachable +; POSTPROCESS-CPS: 28: +; POSTPROCESS-CPS-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP29]]) +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT24:%.*]] = extractelement <4 x float> undef, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP30:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT24]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP30]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT32:%.*]] = extractelement <4 x float> undef, i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP31:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT32]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP31]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT40:%.*]] = extractelement <4 x float> undef, i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP32:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT40]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP32]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT48:%.*]] = extractelement <4 x float> undef, i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP33:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT48]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP33]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP34:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP35:%.*]] = bitcast i32 [[TMP34]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0241_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP35]], i32 0 +; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP36:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0241_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0241_0_VEC_INSERT]], float [[TMP37]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT240:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0241_4_VEC_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT69:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT240]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP70:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP38]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT69]], ptr [[DOTFCA_0_GEP70]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP71:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD72:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP71]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT73:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD72]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP74:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD75:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP74]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT76:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT73]], <3 x float> [[DOTFCA_0_1_0_LOAD75]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP77:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_LOAD78:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP77]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT79:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT76]], <3 x float> [[DOTFCA_0_1_1_LOAD78]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP80:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_LOAD81:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP80]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT82:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT79]], float [[DOTFCA_0_1_2_LOAD81]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP83:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_LOAD84:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP83]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT85:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT82]], i32 [[DOTFCA_0_1_3_LOAD84]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP86:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_LOAD87:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP86]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT88:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT85]], <3 x float> [[DOTFCA_0_2_LOAD87]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP89:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_LOAD90:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP89]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT91:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT88]], <3 x float> [[DOTFCA_0_3_LOAD90]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP92:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_LOAD93:%.*]] = load float, ptr [[DOTFCA_0_4_GEP92]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT94:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT91]], float [[DOTFCA_0_4_LOAD93]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP95:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_LOAD96:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP95]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT97:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT94]], i64 [[DOTFCA_0_5_LOAD96]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP98:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD99:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP98]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT100:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT97]], <3 x float> [[DOTFCA_1_0_LOAD99]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP101:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD102:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP101]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT103:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT100]], <3 x float> [[DOTFCA_1_1_LOAD102]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP104:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_LOAD105:%.*]] = load float, ptr [[DOTFCA_1_2_GEP104]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT106:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT103]], float [[DOTFCA_1_2_LOAD105]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP107:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_LOAD108:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP107]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT109:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT106]], i32 [[DOTFCA_1_3_LOAD108]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP39:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP40:%.*]] = add i32 [[TMP39]], 0 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP40]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT109]]) +; POSTPROCESS-CPS-NEXT: unreachable +; POSTPROCESS-CPS: 41: +; POSTPROCESS-CPS-NEXT: br i1 [[TMP13]], label [[TMP42:%.*]], label [[TMP65:%.*]] +; POSTPROCESS-CPS: 42: +; POSTPROCESS-CPS-NEXT: br i1 [[TMP12]], label [[TMP43:%.*]], label [[TMP54:%.*]] +; POSTPROCESS-CPS: 43: +; POSTPROCESS-CPS-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP44]]) +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT26:%.*]] = extractelement <4 x float> undef, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP45:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT26]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP45]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT34:%.*]] = extractelement <4 x float> undef, i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP46:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT34]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP46]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT42:%.*]] = extractelement <4 x float> undef, i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP47:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT42]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP47]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT50:%.*]] = extractelement <4 x float> undef, i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP48:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT50]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP48]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[TMP49:%.*]] = bitcast i32 [[TMP4]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0245_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP49]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP50:%.*]] = bitcast i32 [[TMP5]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0245_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0245_0_VEC_INSERT]], float [[TMP50]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT244:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0245_4_VEC_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT110:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT244]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP111:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP51]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT110]], ptr [[DOTFCA_0_GEP111]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP112:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD113:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP112]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT114:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD113]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP115:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD116:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP115]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT117:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT114]], <3 x float> [[DOTFCA_0_1_0_LOAD116]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP118:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_LOAD119:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP118]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT120:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT117]], <3 x float> [[DOTFCA_0_1_1_LOAD119]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP121:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_LOAD122:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP121]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT123:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT120]], float [[DOTFCA_0_1_2_LOAD122]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP124:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_LOAD125:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP124]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT126:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT123]], i32 [[DOTFCA_0_1_3_LOAD125]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP127:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_LOAD128:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP127]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT129:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT126]], <3 x float> [[DOTFCA_0_2_LOAD128]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP130:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_LOAD131:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP130]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT132:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT129]], <3 x float> [[DOTFCA_0_3_LOAD131]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP133:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_LOAD134:%.*]] = load float, ptr [[DOTFCA_0_4_GEP133]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT135:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT132]], float [[DOTFCA_0_4_LOAD134]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP136:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_LOAD137:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP136]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT138:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT135]], i64 [[DOTFCA_0_5_LOAD137]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP139:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD140:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP139]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT141:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT138]], <3 x float> [[DOTFCA_1_0_LOAD140]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP142:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD143:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP142]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT144:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT141]], <3 x float> [[DOTFCA_1_1_LOAD143]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP145:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_LOAD146:%.*]] = load float, ptr [[DOTFCA_1_2_GEP145]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT147:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT144]], float [[DOTFCA_1_2_LOAD146]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP148:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_LOAD149:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP148]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT150:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT147]], i32 [[DOTFCA_1_3_LOAD149]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP52:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP53:%.*]] = add i32 [[TMP52]], 0 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP53]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT150]]) +; POSTPROCESS-CPS-NEXT: unreachable +; POSTPROCESS-CPS: 54: +; POSTPROCESS-CPS-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP55]]) +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT28:%.*]] = extractelement <4 x float> undef, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP56:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT28]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP56]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT36:%.*]] = extractelement <4 x float> undef, i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP57:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT36]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP57]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT44:%.*]] = extractelement <4 x float> undef, i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP58:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT44]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP58]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT52:%.*]] = extractelement <4 x float> undef, i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP59:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT52]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP59]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[TMP60:%.*]] = bitcast i32 [[TMP4]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0249_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP60]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP61:%.*]] = bitcast i32 [[TMP5]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0249_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0249_0_VEC_INSERT]], float [[TMP61]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT248:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0249_4_VEC_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT151:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT248]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP152:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP62]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT151]], ptr [[DOTFCA_0_GEP152]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP153:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD154:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP153]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT155:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD154]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP156:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD157:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP156]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT158:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT155]], <3 x float> [[DOTFCA_0_1_0_LOAD157]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP159:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_LOAD160:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP159]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT161:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT158]], <3 x float> [[DOTFCA_0_1_1_LOAD160]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP162:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_LOAD163:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP162]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT164:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT161]], float [[DOTFCA_0_1_2_LOAD163]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP165:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_LOAD166:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP165]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT167:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT164]], i32 [[DOTFCA_0_1_3_LOAD166]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP168:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_LOAD169:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP168]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT170:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT167]], <3 x float> [[DOTFCA_0_2_LOAD169]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP171:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_LOAD172:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP171]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT173:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT170]], <3 x float> [[DOTFCA_0_3_LOAD172]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP174:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_LOAD175:%.*]] = load float, ptr [[DOTFCA_0_4_GEP174]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT176:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT173]], float [[DOTFCA_0_4_LOAD175]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP177:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_LOAD178:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP177]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT179:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT176]], i64 [[DOTFCA_0_5_LOAD178]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP180:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD181:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP180]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT182:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT179]], <3 x float> [[DOTFCA_1_0_LOAD181]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP183:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD184:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP183]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT185:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT182]], <3 x float> [[DOTFCA_1_1_LOAD184]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP186:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_LOAD187:%.*]] = load float, ptr [[DOTFCA_1_2_GEP186]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT188:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT185]], float [[DOTFCA_1_2_LOAD187]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP189:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_LOAD190:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP189]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT191:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT188]], i32 [[DOTFCA_1_3_LOAD190]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP63:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP64:%.*]] = add i32 [[TMP63]], 0 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP64]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT191]]) +; POSTPROCESS-CPS-NEXT: unreachable +; POSTPROCESS-CPS: 65: +; POSTPROCESS-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT30:%.*]] = extractelement <4 x float> undef, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP66:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT30]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP66]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT38:%.*]] = extractelement <4 x float> undef, i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP67:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT38]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP67]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT46:%.*]] = extractelement <4 x float> undef, i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP68:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT46]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP68]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT54:%.*]] = extractelement <4 x float> undef, i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP69:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT54]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP69]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP70:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP71:%.*]] = bitcast i32 [[TMP70]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0253_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP71]], i32 0 +; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP72:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP73:%.*]] = bitcast i32 [[TMP72]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0253_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0253_0_VEC_INSERT]], float [[TMP73]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT252:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0253_4_VEC_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT192:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT252]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP193:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP74]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT192]], ptr [[DOTFCA_0_GEP193]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP194:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD195:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP194]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT196:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD195]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP197:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD198:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP197]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT199:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT196]], <3 x float> [[DOTFCA_0_1_0_LOAD198]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP200:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_LOAD201:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP200]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT202:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT199]], <3 x float> [[DOTFCA_0_1_1_LOAD201]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP203:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_LOAD204:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP203]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT205:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT202]], float [[DOTFCA_0_1_2_LOAD204]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP206:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_LOAD207:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP206]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT208:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT205]], i32 [[DOTFCA_0_1_3_LOAD207]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP209:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_LOAD210:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP209]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT211:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT208]], <3 x float> [[DOTFCA_0_2_LOAD210]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP212:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_LOAD213:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP212]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT214:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT211]], <3 x float> [[DOTFCA_0_3_LOAD213]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP215:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_LOAD216:%.*]] = load float, ptr [[DOTFCA_0_4_GEP215]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT217:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT214]], float [[DOTFCA_0_4_LOAD216]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP218:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_LOAD219:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP218]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT220:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT217]], i64 [[DOTFCA_0_5_LOAD219]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP221:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD222:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP221]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT223:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT220]], <3 x float> [[DOTFCA_1_0_LOAD222]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP224:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD225:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP224]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT226:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT223]], <3 x float> [[DOTFCA_1_1_LOAD225]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP227:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_LOAD228:%.*]] = load float, ptr [[DOTFCA_1_2_GEP227]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT229:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT226]], float [[DOTFCA_1_2_LOAD228]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP230:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_LOAD231:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP230]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT232:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT229]], i32 [[DOTFCA_1_3_LOAD231]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP75:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP76:%.*]] = add i32 [[TMP75]], 0 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP76]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT232]]) +; POSTPROCESS-CPS-NEXT: unreachable +; +; +; POSTPROCESS-CPS-LABEL: define void @MyIntersectionShader( +; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41:![0-9]+]] !continuation [[META42:![0-9]+]] { +; POSTPROCESS-CPS-NEXT: AllocaSpillBB: +; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 +; POSTPROCESS-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP4]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = bitcast <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] to <3 x float> +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP5]], 0 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP6]], 3 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 +; POSTPROCESS-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[DOTFCA_0_4_EXTRACT]] +; POSTPROCESS-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] +; POSTPROCESS-CPS: callAHit.i: +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyIntersectionShader.resume.0) +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 8, {} poison, i32 [[TMP7]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32:![0-9]+]] +; POSTPROCESS-CPS-NEXT: unreachable +; POSTPROCESS-CPS: isEnd.i: +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP9]], i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP11]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP12]], <2 x i32> poison, <3 x i32> +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] +; POSTPROCESS-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; POSTPROCESS-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP13:%.*]], label [[TMP16:%.*]] +; POSTPROCESS-CPS: 13: +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP15]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: unreachable +; POSTPROCESS-CPS: 16: +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP18]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; POSTPROCESS-CPS-NEXT: unreachable +; +; +; POSTPROCESS-CPS-LABEL: define dso_local void @MyIntersectionShader.resume.0( +; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META42]] { +; POSTPROCESS-CPS-NEXT: entryresume.0: +; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], -8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 3 +; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POSTPROCESS-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; POSTPROCESS-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP6:%.*]], label [[TMP11:%.*]] +; POSTPROCESS-CPS: 6: +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP7]], i32 0 +; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(21) [[TMP8]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP10]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: unreachable +; POSTPROCESS-CPS: 11: +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP12]], i32 0 +; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP13]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP15]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; POSTPROCESS-CPS-NEXT: unreachable +; +; +; POSTPROCESS-CPS-LABEL: define void @MyIntersectionShader2( +; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43:![0-9]+]] { +; POSTPROCESS-CPS-NEXT: AllocaSpillBB: +; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 +; POSTPROCESS-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP4]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = bitcast <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] to <3 x float> +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP5]], 0 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP6]], 3 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 +; POSTPROCESS-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[DOTFCA_0_4_EXTRACT]] +; POSTPROCESS-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] +; POSTPROCESS-CPS: callAHit.i: +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]] poison, <2 x float> undef, 0 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyIntersectionShader2.resume.0) +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 8, {} poison, i32 [[TMP7]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: unreachable +; POSTPROCESS-CPS: isEnd.i: +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP9]], i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP11]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP12]], <2 x i32> poison, <3 x i32> +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] +; POSTPROCESS-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; POSTPROCESS-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP13:%.*]], label [[TMP16:%.*]] +; POSTPROCESS-CPS: 13: +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP15]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: unreachable +; POSTPROCESS-CPS: 16: +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP18]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; POSTPROCESS-CPS-NEXT: unreachable +; +; +; POSTPROCESS-CPS-LABEL: define dso_local void @MyIntersectionShader2.resume.0( +; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43]] { +; POSTPROCESS-CPS-NEXT: entryresume.0: +; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], -8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 3 +; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POSTPROCESS-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; POSTPROCESS-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP6:%.*]], label [[TMP11:%.*]] +; POSTPROCESS-CPS: 6: +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP7]], i32 0 +; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(21) [[TMP8]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP10]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: unreachable +; POSTPROCESS-CPS: 11: +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP12]], i32 0 +; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP13]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP15]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; POSTPROCESS-CPS-NEXT: unreachable +; +; +; POSTPROCESS-CPS-LABEL: define void @MyMissShader( +; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META41]] !lgc.cps [[META37]] !continuation [[META44:![0-9]+]] { +; POSTPROCESS-CPS-NEXT: AllocaSpillBB: +; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP1]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP2]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP3]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP4]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP6]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; POSTPROCESS-CPS-NEXT: unreachable +; +; +; SROA-CPS-LABEL: define i32 @_cont_GetContinuationStackAddr( +; SROA-CPS-SAME: ) #[[ATTR0:[0-9]+]] { +; SROA-CPS-NEXT: ret i32 0 +; +; +; SROA-CPS-LABEL: define %struct.HitData @_cont_GetCandidateState( +; SROA-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; SROA-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_ANYHITTRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 0 +; SROA-CPS-NEXT: [[RES:%.*]] = load [[STRUCT_HITDATA:%.*]], ptr [[RESPTR]], align 4 +; SROA-CPS-NEXT: ret [[STRUCT_HITDATA]] [[RES]] +; +; +; SROA-CPS-LABEL: define void @_cont_SetTriangleHitAttributes( +; SROA-CPS-SAME: ptr [[DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[VAL:%.*]]) { +; SROA-CPS-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0 +; SROA-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]], ptr [[ADDR]], align 4 +; SROA-CPS-NEXT: ret void +; +; +; SROA-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; SROA-CPS-SAME: ptr [[DATA:%.*]]) { +; SROA-CPS-NEXT: ret i32 5 +; +; +; SROA-CPS-LABEL: define i1 @_cont_IsEndSearch( +; SROA-CPS-SAME: ptr [[TMP0:%.*]]) #[[ATTR0]] { +; SROA-CPS-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() +; SROA-CPS-NEXT: ret i1 [[ISEND]] +; +; +; SROA-CPS-LABEL: define <3 x i32> @_cont_DispatchRaysIndex3( +; SROA-CPS-SAME: ptr [[DATA:%.*]]) { +; SROA-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[RES_1:%.*]] = load i32, ptr [[RESPTR_1]], align 4 +; SROA-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 1 +; SROA-CPS-NEXT: [[RES_2:%.*]] = load i32, ptr [[RESPTR_2]], align 4 +; SROA-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 2 +; SROA-CPS-NEXT: [[RES_3:%.*]] = load i32, ptr [[RESPTR_3]], align 4 +; SROA-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1]], i32 0 +; SROA-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x i32> [[VAL_0]], i32 [[RES_2]], i32 1 +; SROA-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x i32> [[VAL_1]], i32 [[RES_3]], i32 2 +; SROA-CPS-NEXT: ret <3 x i32> [[VAL_2]] +; +; +; SROA-CPS-LABEL: define <3 x float> @_cont_ObjectRayOrigin3( +; SROA-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; SROA-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 +; SROA-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 1 +; SROA-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 +; SROA-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 2 +; SROA-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 +; SROA-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 +; SROA-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 +; SROA-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 +; SROA-CPS-NEXT: ret <3 x float> [[VAL_2]] +; +; +; SROA-CPS-LABEL: define <3 x float> @_cont_ObjectRayDirection3( +; SROA-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; SROA-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 1, i32 0 +; SROA-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 +; SROA-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 1 +; SROA-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 +; SROA-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 2 +; SROA-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 +; SROA-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 +; SROA-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 +; SROA-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 +; SROA-CPS-NEXT: ret <3 x float> [[VAL_2]] +; +; +; SROA-CPS-LABEL: define float @_cont_RayTCurrent( +; SROA-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; SROA-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 2 +; SROA-CPS-NEXT: [[RES:%.*]] = load float, ptr [[RESPTR]], align 4 +; SROA-CPS-NEXT: ret float [[RES]] +; +; +; SROA-CPS-LABEL: define void @MyRayGen( +; SROA-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !lgc.cps [[META22]] !continuation [[META35:![0-9]+]] { +; SROA-CPS-NEXT: AllocaSpillBB: +; SROA-CPS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() +; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 +; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; SROA-CPS-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; SROA-CPS-NEXT: [[TMP4:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; SROA-CPS-NEXT: [[TMP5:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; SROA-CPS-NEXT: [[TMP6:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP5]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; SROA-CPS-NEXT: [[TMP7:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP6]]) +; SROA-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; SROA-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 +; SROA-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; SROA-CPS-NEXT: [[TMP8:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyRayGen.resume.0) +; SROA-CPS-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +; SROA-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP9]], 5 +; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 +; SROA-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP10]], ptr addrspace(20) @REGISTERS, align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 +; SROA-CPS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 +; SROA-CPS-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 +; SROA-CPS-NEXT: [[TMP13:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i32 [[TMP8]], i32 5), !continuation.returnedRegistercount !33, !continuation.registercount [[META33:![0-9]+]] +; SROA-CPS-NEXT: unreachable +; +; +; SROA-CPS-LABEL: define dso_local void @MyRayGen.resume.0( +; SROA-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META22]] !lgc.cps [[META22]] !continuation [[META35]] { +; SROA-CPS-NEXT: entryresume.0: +; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; SROA-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; SROA-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i32 0 +; SROA-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float +; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP7]], i32 1 +; SROA-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float +; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP9]], i32 2 +; SROA-CPS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float +; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP11]], i32 3 +; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT6:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; SROA-CPS-NEXT: [[TMP12:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT2:%.*]] = extractelement <3 x i32> [[DOTFCA_0_EXTRACT]], i32 0 +; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT4:%.*]] = extractelement <3 x i32> [[DOTFCA_0_EXTRACT]], i32 1 +; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT6:%.*]] = extractelement <3 x i32> [[DOTFCA_0_EXTRACT]], i32 2 +; SROA-CPS-NEXT: [[VAL_0_I6:%.*]] = insertelement <3 x i32> undef, i32 [[DOTSROA_0_0_VEC_EXTRACT2]], i32 0 +; SROA-CPS-NEXT: [[VAL_1_I7:%.*]] = insertelement <3 x i32> [[VAL_0_I6]], i32 [[DOTSROA_0_4_VEC_EXTRACT4]], i32 1 +; SROA-CPS-NEXT: [[VAL_2_I8:%.*]] = insertelement <3 x i32> [[VAL_1_I7]], i32 [[DOTSROA_0_8_VEC_EXTRACT6]], i32 2 +; SROA-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[VAL_2_I8]], i8 0 +; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[DOTFCA_0_EXTRACT]], i32 0 +; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[DOTFCA_0_EXTRACT]], i32 1 +; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[DOTFCA_0_EXTRACT]], i32 2 +; SROA-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x i32> undef, i32 [[DOTSROA_0_0_VEC_EXTRACT]], i32 0 +; SROA-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x i32> [[VAL_0_I]], i32 [[DOTSROA_0_4_VEC_EXTRACT]], i32 1 +; SROA-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x i32> [[VAL_1_I]], i32 [[DOTSROA_0_8_VEC_EXTRACT]], i32 2 +; SROA-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[VAL_2_I]], i8 1 +; SROA-CPS-NEXT: [[TMP13:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP12]]) +; SROA-CPS-NEXT: [[TMP14:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP13]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; SROA-CPS-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 +; SROA-CPS-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 +; SROA-CPS-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 +; SROA-CPS-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 3 +; SROA-CPS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP14]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP15]], float [[TMP16]], float [[TMP17]], float [[TMP18]], i8 15) +; SROA-CPS-NEXT: ret void +; +; +; SROA-CPS-LABEL: define void @MyClosestHitShader( +; SROA-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META36:![0-9]+]] !lgc.cps [[META37:![0-9]+]] !continuation [[META38:![0-9]+]] { +; SROA-CPS-NEXT: AllocaSpillBB: +; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; SROA-CPS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 +; SROA-CPS-NEXT: [[TMP1:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 +; SROA-CPS-NEXT: [[DOTSROA_03_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; SROA-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[DOTSROA_03_0_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float +; SROA-CPS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 0 +; SROA-CPS-NEXT: [[DOTSROA_03_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; SROA-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_03_4_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; SROA-CPS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP5]], i32 1 +; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; SROA-CPS-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 +; SROA-CPS-NEXT: [[TMP7:%.*]] = fsub fast float 1.000000e+00, [[TMP6]] +; SROA-CPS-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 +; SROA-CPS-NEXT: [[TMP9:%.*]] = fsub fast float [[TMP7]], [[TMP8]] +; SROA-CPS-NEXT: [[TMP10:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i64 0 +; SROA-CPS-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP6]], i64 1 +; SROA-CPS-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 2 +; SROA-CPS-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float 1.000000e+00, i64 3 +; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 0 +; SROA-CPS-NEXT: [[TMP14:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP14]], ptr addrspace(20) @REGISTERS, align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 1 +; SROA-CPS-NEXT: [[TMP15:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 2 +; SROA-CPS-NEXT: [[TMP16:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 3 +; SROA-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP18]], i32 0, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 +; SROA-CPS-NEXT: [[TMP19:%.*]] = add i32 [[CSPINIT]], 0 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; SROA-CPS-NEXT: unreachable +; +; +; SROA-CPS-LABEL: define void @MyAnyHitShader( +; SROA-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META39:![0-9]+]] !lgc.cps [[META36]] !continuation [[META40:![0-9]+]] { +; SROA-CPS-NEXT: AllocaSpillBB: +; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: store <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_0_GEP]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; SROA-CPS-NEXT: store <3 x float> [[DOTFCA_0_1_0_EXTRACT]], ptr [[DOTFCA_0_1_0_GEP]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; SROA-CPS-NEXT: store <3 x float> [[DOTFCA_0_1_1_EXTRACT]], ptr [[DOTFCA_0_1_1_GEP]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; SROA-CPS-NEXT: store float [[DOTFCA_0_1_2_EXTRACT]], ptr [[DOTFCA_0_1_2_GEP]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; SROA-CPS-NEXT: store i32 [[DOTFCA_0_1_3_EXTRACT]], ptr [[DOTFCA_0_1_3_GEP]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; SROA-CPS-NEXT: store <3 x float> [[DOTFCA_0_2_EXTRACT]], ptr [[DOTFCA_0_2_GEP]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; SROA-CPS-NEXT: store <3 x float> [[DOTFCA_0_3_EXTRACT]], ptr [[DOTFCA_0_3_GEP]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; SROA-CPS-NEXT: store float [[DOTFCA_0_4_EXTRACT]], ptr [[DOTFCA_0_4_GEP]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_0_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; SROA-CPS-NEXT: store i64 [[DOTFCA_0_5_EXTRACT]], ptr [[DOTFCA_0_5_GEP]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; SROA-CPS-NEXT: store <3 x float> [[DOTFCA_1_0_EXTRACT]], ptr [[DOTFCA_1_0_GEP]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; SROA-CPS-NEXT: store <3 x float> [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; SROA-CPS-NEXT: store float [[DOTFCA_1_2_EXTRACT]], ptr [[DOTFCA_1_2_GEP]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; SROA-CPS-NEXT: store i32 [[DOTFCA_1_3_EXTRACT]], ptr [[DOTFCA_1_3_GEP]], align 4 +; SROA-CPS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[TMP3:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[TMP2]]) +; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT233:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP3]], 0 +; SROA-CPS-NEXT: [[DOTSROA_0235_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT233]], i32 0 +; SROA-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0235_0_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: [[DOTSROA_0235_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT233]], i32 1 +; SROA-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0235_4_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 +; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; SROA-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[RES_I1_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA:%.*]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; SROA-CPS-NEXT: [[RES_I1_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_0_GEP]], align 4 +; SROA-CPS-NEXT: [[RES_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I1_FCA_0_LOAD]], 0 +; SROA-CPS-NEXT: [[RES_I1_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; SROA-CPS-NEXT: [[RES_I1_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_1_GEP]], align 4 +; SROA-CPS-NEXT: [[RES_I1_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_0_INSERT]], <3 x float> [[RES_I1_FCA_1_LOAD]], 1 +; SROA-CPS-NEXT: [[RES_I1_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; SROA-CPS-NEXT: [[RES_I1_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I1_FCA_2_GEP]], align 4 +; SROA-CPS-NEXT: [[RES_I1_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_1_INSERT]], float [[RES_I1_FCA_2_LOAD]], 2 +; SROA-CPS-NEXT: [[RES_I1_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; SROA-CPS-NEXT: [[RES_I1_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I1_FCA_3_GEP]], align 4 +; SROA-CPS-NEXT: [[RES_I1_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_2_INSERT]], i32 [[RES_I1_FCA_3_LOAD]], 3 +; SROA-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 0 +; SROA-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 1 +; SROA-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 2 +; SROA-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 3 +; SROA-CPS-NEXT: [[DOTSROA_0256_0_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 0 +; SROA-CPS-NEXT: [[DOTSROA_0256_4_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 1 +; SROA-CPS-NEXT: [[DOTSROA_0256_8_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 2 +; SROA-CPS-NEXT: [[VAL_0_I7:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_0256_0_VEC_EXTRACT]], i32 0 +; SROA-CPS-NEXT: [[VAL_1_I8:%.*]] = insertelement <3 x float> [[VAL_0_I7]], float [[DOTSROA_0256_4_VEC_EXTRACT]], i32 1 +; SROA-CPS-NEXT: [[VAL_2_I9:%.*]] = insertelement <3 x float> [[VAL_1_I8]], float [[DOTSROA_0256_8_VEC_EXTRACT]], i32 2 +; SROA-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[VAL_2_I9]], i8 0 +; SROA-CPS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; SROA-CPS-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_0_GEP]], align 4 +; SROA-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I_FCA_0_LOAD]], 0 +; SROA-CPS-NEXT: [[RES_I_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; SROA-CPS-NEXT: [[RES_I_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_1_GEP]], align 4 +; SROA-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[RES_I_FCA_1_LOAD]], 1 +; SROA-CPS-NEXT: [[RES_I_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; SROA-CPS-NEXT: [[RES_I_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I_FCA_2_GEP]], align 4 +; SROA-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[RES_I_FCA_2_LOAD]], 2 +; SROA-CPS-NEXT: [[RES_I_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; SROA-CPS-NEXT: [[RES_I_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I_FCA_3_GEP]], align 4 +; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[RES_I_FCA_3_LOAD]], 3 +; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 +; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 +; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 +; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 +; SROA-CPS-NEXT: [[DOTSROA_1_12_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 0 +; SROA-CPS-NEXT: [[DOTSROA_1_16_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 1 +; SROA-CPS-NEXT: [[DOTSROA_1_20_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 2 +; SROA-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_1_12_VEC_EXTRACT]], i32 0 +; SROA-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x float> [[VAL_0_I]], float [[DOTSROA_1_16_VEC_EXTRACT]], i32 1 +; SROA-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x float> [[VAL_1_I]], float [[DOTSROA_1_20_VEC_EXTRACT]], i32 2 +; SROA-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[VAL_2_I]], i8 0 +; SROA-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[RES_I10_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; SROA-CPS-NEXT: [[RES_I10_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I10_FCA_0_GEP]], align 4 +; SROA-CPS-NEXT: [[RES_I10_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I10_FCA_0_LOAD]], 0 +; SROA-CPS-NEXT: [[RES_I10_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; SROA-CPS-NEXT: [[RES_I10_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I10_FCA_1_GEP]], align 4 +; SROA-CPS-NEXT: [[RES_I10_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_0_INSERT]], <3 x float> [[RES_I10_FCA_1_LOAD]], 1 +; SROA-CPS-NEXT: [[RES_I10_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; SROA-CPS-NEXT: [[RES_I10_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I10_FCA_2_GEP]], align 4 +; SROA-CPS-NEXT: [[RES_I10_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_1_INSERT]], float [[RES_I10_FCA_2_LOAD]], 2 +; SROA-CPS-NEXT: [[RES_I10_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; SROA-CPS-NEXT: [[RES_I10_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I10_FCA_3_GEP]], align 4 +; SROA-CPS-NEXT: [[RES_I10_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_2_INSERT]], i32 [[RES_I10_FCA_3_LOAD]], 3 +; SROA-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 0 +; SROA-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 1 +; SROA-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 2 +; SROA-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 3 +; SROA-CPS-NEXT: [[TMP9:%.*]] = fmul fast float [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT]], [[EXTRACT]] +; SROA-CPS-NEXT: [[TMP10:%.*]] = fadd fast float [[TMP9]], [[EXTRACT1]] +; SROA-CPS-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], 0.000000e+00 +; SROA-CPS-NEXT: [[TMP12:%.*]] = fcmp fast ogt float [[TMP10]], 1.000000e+00 +; SROA-CPS-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP10]], -1.000000e+00 +; SROA-CPS-NEXT: br i1 [[TMP11]], label [[TMP14:%.*]], label [[TMP39:%.*]] +; SROA-CPS: 14: +; SROA-CPS-NEXT: br i1 [[TMP12]], label [[TMP15:%.*]], label [[TMP27:%.*]] +; SROA-CPS: 15: +; SROA-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP16]]) +; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 0 +; SROA-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP17]], ptr addrspace(20) @REGISTERS, align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 1 +; SROA-CPS-NEXT: [[TMP18:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 2 +; SROA-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 3 +; SROA-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; SROA-CPS-NEXT: [[TMP21:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: [[TMP22:%.*]] = bitcast i32 [[TMP21]] to float +; SROA-CPS-NEXT: [[DOTSROA_0237_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP22]], i32 0 +; SROA-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; SROA-CPS-NEXT: [[TMP23:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float +; SROA-CPS-NEXT: [[DOTSROA_0237_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0237_0_VEC_INSERT]], float [[TMP24]], i32 1 +; SROA-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0237_4_VEC_INSERT]], 0 +; SROA-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT55:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], 0 +; SROA-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP25]], i32 0, i32 0 +; SROA-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT55]], ptr [[DOTFCA_0_GEP]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP56:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP56]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_GEP57:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP57]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_GEP58:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP58]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_LOAD]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_GEP59:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP59]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_LOAD]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_GEP60:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP60]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_LOAD]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_GEP61:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; SROA-CPS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP61]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_LOAD]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_GEP62:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; SROA-CPS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP62]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_LOAD]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_GEP63:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; SROA-CPS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP63]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_LOAD]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_GEP64:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; SROA-CPS-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP64]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_LOAD]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_GEP65:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP65]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_LOAD]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_GEP66:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; SROA-CPS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP66]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_LOAD]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_GEP67:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; SROA-CPS-NEXT: [[DOTFCA_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_1_2_GEP67]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_LOAD]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_GEP68:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; SROA-CPS-NEXT: [[DOTFCA_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP68]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_LOAD]], 1, 3 +; SROA-CPS-NEXT: [[TMP26:%.*]] = add i32 [[CSPINIT]], 0 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; SROA-CPS-NEXT: unreachable +; SROA-CPS: 27: +; SROA-CPS-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP28]]) +; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT24:%.*]] = extractelement <4 x float> undef, i32 0 +; SROA-CPS-NEXT: [[TMP29:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT24]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP29]], ptr addrspace(20) @REGISTERS, align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT32:%.*]] = extractelement <4 x float> undef, i32 1 +; SROA-CPS-NEXT: [[TMP30:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT32]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP30]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT40:%.*]] = extractelement <4 x float> undef, i32 2 +; SROA-CPS-NEXT: [[TMP31:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT40]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP31]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT48:%.*]] = extractelement <4 x float> undef, i32 3 +; SROA-CPS-NEXT: [[TMP32:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT48]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP32]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; SROA-CPS-NEXT: [[TMP33:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13]] to i32 +; SROA-CPS-NEXT: [[TMP34:%.*]] = bitcast i32 [[TMP33]] to float +; SROA-CPS-NEXT: [[DOTSROA_0241_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP34]], i32 0 +; SROA-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; SROA-CPS-NEXT: [[TMP35:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17]] to i32 +; SROA-CPS-NEXT: [[TMP36:%.*]] = bitcast i32 [[TMP35]] to float +; SROA-CPS-NEXT: [[DOTSROA_0241_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0241_0_VEC_INSERT]], float [[TMP36]], i32 1 +; SROA-CPS-NEXT: [[DOTFCA_0_INSERT240:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0241_4_VEC_INSERT]], 0 +; SROA-CPS-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT69:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT240]], 0 +; SROA-CPS-NEXT: [[DOTFCA_0_GEP70:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP37]], i32 0, i32 0 +; SROA-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT69]], ptr [[DOTFCA_0_GEP70]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP71:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD72:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP71]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT73:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD72]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_GEP74:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_LOAD75:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP74]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT76:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT73]], <3 x float> [[DOTFCA_0_1_0_LOAD75]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_GEP77:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_LOAD78:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP77]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT79:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT76]], <3 x float> [[DOTFCA_0_1_1_LOAD78]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_GEP80:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_LOAD81:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP80]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT82:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT79]], float [[DOTFCA_0_1_2_LOAD81]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_GEP83:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_LOAD84:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP83]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT85:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT82]], i32 [[DOTFCA_0_1_3_LOAD84]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_GEP86:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; SROA-CPS-NEXT: [[DOTFCA_0_2_LOAD87:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP86]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT88:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT85]], <3 x float> [[DOTFCA_0_2_LOAD87]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_GEP89:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; SROA-CPS-NEXT: [[DOTFCA_0_3_LOAD90:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP89]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT91:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT88]], <3 x float> [[DOTFCA_0_3_LOAD90]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_GEP92:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; SROA-CPS-NEXT: [[DOTFCA_0_4_LOAD93:%.*]] = load float, ptr [[DOTFCA_0_4_GEP92]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT94:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT91]], float [[DOTFCA_0_4_LOAD93]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_GEP95:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; SROA-CPS-NEXT: [[DOTFCA_0_5_LOAD96:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP95]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT97:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT94]], i64 [[DOTFCA_0_5_LOAD96]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_GEP98:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_1_0_LOAD99:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP98]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT100:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT97]], <3 x float> [[DOTFCA_1_0_LOAD99]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_GEP101:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; SROA-CPS-NEXT: [[DOTFCA_1_1_LOAD102:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP101]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT103:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT100]], <3 x float> [[DOTFCA_1_1_LOAD102]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_GEP104:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; SROA-CPS-NEXT: [[DOTFCA_1_2_LOAD105:%.*]] = load float, ptr [[DOTFCA_1_2_GEP104]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT106:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT103]], float [[DOTFCA_1_2_LOAD105]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_GEP107:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; SROA-CPS-NEXT: [[DOTFCA_1_3_LOAD108:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP107]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT109:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT106]], i32 [[DOTFCA_1_3_LOAD108]], 1, 3 +; SROA-CPS-NEXT: [[TMP38:%.*]] = add i32 [[CSPINIT]], 0 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT109]]) +; SROA-CPS-NEXT: unreachable +; SROA-CPS: 39: +; SROA-CPS-NEXT: br i1 [[TMP13]], label [[TMP40:%.*]], label [[TMP61:%.*]] +; SROA-CPS: 40: +; SROA-CPS-NEXT: br i1 [[TMP12]], label [[TMP41:%.*]], label [[TMP51:%.*]] +; SROA-CPS: 41: +; SROA-CPS-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP42]]) +; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT26:%.*]] = extractelement <4 x float> undef, i32 0 +; SROA-CPS-NEXT: [[TMP43:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT26]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP43]], ptr addrspace(20) @REGISTERS, align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT34:%.*]] = extractelement <4 x float> undef, i32 1 +; SROA-CPS-NEXT: [[TMP44:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT34]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP44]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT42:%.*]] = extractelement <4 x float> undef, i32 2 +; SROA-CPS-NEXT: [[TMP45:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT42]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP45]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT50:%.*]] = extractelement <4 x float> undef, i32 3 +; SROA-CPS-NEXT: [[TMP46:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT50]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP46]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[TMP47:%.*]] = bitcast i32 [[TMP4]] to float +; SROA-CPS-NEXT: [[DOTSROA_0245_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP47]], i32 0 +; SROA-CPS-NEXT: [[TMP48:%.*]] = bitcast i32 [[TMP5]] to float +; SROA-CPS-NEXT: [[DOTSROA_0245_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0245_0_VEC_INSERT]], float [[TMP48]], i32 1 +; SROA-CPS-NEXT: [[DOTFCA_0_INSERT244:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0245_4_VEC_INSERT]], 0 +; SROA-CPS-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT110:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT244]], 0 +; SROA-CPS-NEXT: [[DOTFCA_0_GEP111:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP49]], i32 0, i32 0 +; SROA-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT110]], ptr [[DOTFCA_0_GEP111]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP112:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD113:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP112]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT114:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD113]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_GEP115:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_LOAD116:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP115]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT117:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT114]], <3 x float> [[DOTFCA_0_1_0_LOAD116]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_GEP118:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_LOAD119:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP118]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT120:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT117]], <3 x float> [[DOTFCA_0_1_1_LOAD119]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_GEP121:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_LOAD122:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP121]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT123:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT120]], float [[DOTFCA_0_1_2_LOAD122]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_GEP124:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_LOAD125:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP124]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT126:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT123]], i32 [[DOTFCA_0_1_3_LOAD125]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_GEP127:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; SROA-CPS-NEXT: [[DOTFCA_0_2_LOAD128:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP127]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT129:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT126]], <3 x float> [[DOTFCA_0_2_LOAD128]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_GEP130:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; SROA-CPS-NEXT: [[DOTFCA_0_3_LOAD131:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP130]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT132:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT129]], <3 x float> [[DOTFCA_0_3_LOAD131]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_GEP133:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; SROA-CPS-NEXT: [[DOTFCA_0_4_LOAD134:%.*]] = load float, ptr [[DOTFCA_0_4_GEP133]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT135:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT132]], float [[DOTFCA_0_4_LOAD134]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_GEP136:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; SROA-CPS-NEXT: [[DOTFCA_0_5_LOAD137:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP136]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT138:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT135]], i64 [[DOTFCA_0_5_LOAD137]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_GEP139:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_1_0_LOAD140:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP139]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT141:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT138]], <3 x float> [[DOTFCA_1_0_LOAD140]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_GEP142:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; SROA-CPS-NEXT: [[DOTFCA_1_1_LOAD143:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP142]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT144:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT141]], <3 x float> [[DOTFCA_1_1_LOAD143]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_GEP145:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; SROA-CPS-NEXT: [[DOTFCA_1_2_LOAD146:%.*]] = load float, ptr [[DOTFCA_1_2_GEP145]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT147:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT144]], float [[DOTFCA_1_2_LOAD146]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_GEP148:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; SROA-CPS-NEXT: [[DOTFCA_1_3_LOAD149:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP148]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT150:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT147]], i32 [[DOTFCA_1_3_LOAD149]], 1, 3 +; SROA-CPS-NEXT: [[TMP50:%.*]] = add i32 [[CSPINIT]], 0 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT150]]) +; SROA-CPS-NEXT: unreachable +; SROA-CPS: 51: +; SROA-CPS-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP52]]) +; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT28:%.*]] = extractelement <4 x float> undef, i32 0 +; SROA-CPS-NEXT: [[TMP53:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT28]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP53]], ptr addrspace(20) @REGISTERS, align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT36:%.*]] = extractelement <4 x float> undef, i32 1 +; SROA-CPS-NEXT: [[TMP54:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT36]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP54]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT44:%.*]] = extractelement <4 x float> undef, i32 2 +; SROA-CPS-NEXT: [[TMP55:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT44]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP55]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT52:%.*]] = extractelement <4 x float> undef, i32 3 +; SROA-CPS-NEXT: [[TMP56:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT52]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP56]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[TMP57:%.*]] = bitcast i32 [[TMP4]] to float +; SROA-CPS-NEXT: [[DOTSROA_0249_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP57]], i32 0 +; SROA-CPS-NEXT: [[TMP58:%.*]] = bitcast i32 [[TMP5]] to float +; SROA-CPS-NEXT: [[DOTSROA_0249_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0249_0_VEC_INSERT]], float [[TMP58]], i32 1 +; SROA-CPS-NEXT: [[DOTFCA_0_INSERT248:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0249_4_VEC_INSERT]], 0 +; SROA-CPS-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT151:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT248]], 0 +; SROA-CPS-NEXT: [[DOTFCA_0_GEP152:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP59]], i32 0, i32 0 +; SROA-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT151]], ptr [[DOTFCA_0_GEP152]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP153:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD154:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP153]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT155:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD154]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_GEP156:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_LOAD157:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP156]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT158:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT155]], <3 x float> [[DOTFCA_0_1_0_LOAD157]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_GEP159:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_LOAD160:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP159]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT161:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT158]], <3 x float> [[DOTFCA_0_1_1_LOAD160]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_GEP162:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_LOAD163:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP162]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT164:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT161]], float [[DOTFCA_0_1_2_LOAD163]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_GEP165:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_LOAD166:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP165]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT167:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT164]], i32 [[DOTFCA_0_1_3_LOAD166]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_GEP168:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; SROA-CPS-NEXT: [[DOTFCA_0_2_LOAD169:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP168]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT170:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT167]], <3 x float> [[DOTFCA_0_2_LOAD169]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_GEP171:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; SROA-CPS-NEXT: [[DOTFCA_0_3_LOAD172:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP171]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT173:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT170]], <3 x float> [[DOTFCA_0_3_LOAD172]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_GEP174:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; SROA-CPS-NEXT: [[DOTFCA_0_4_LOAD175:%.*]] = load float, ptr [[DOTFCA_0_4_GEP174]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT176:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT173]], float [[DOTFCA_0_4_LOAD175]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_GEP177:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; SROA-CPS-NEXT: [[DOTFCA_0_5_LOAD178:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP177]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT179:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT176]], i64 [[DOTFCA_0_5_LOAD178]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_GEP180:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_1_0_LOAD181:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP180]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT182:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT179]], <3 x float> [[DOTFCA_1_0_LOAD181]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_GEP183:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; SROA-CPS-NEXT: [[DOTFCA_1_1_LOAD184:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP183]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT185:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT182]], <3 x float> [[DOTFCA_1_1_LOAD184]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_GEP186:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; SROA-CPS-NEXT: [[DOTFCA_1_2_LOAD187:%.*]] = load float, ptr [[DOTFCA_1_2_GEP186]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT188:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT185]], float [[DOTFCA_1_2_LOAD187]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_GEP189:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; SROA-CPS-NEXT: [[DOTFCA_1_3_LOAD190:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP189]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT191:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT188]], i32 [[DOTFCA_1_3_LOAD190]], 1, 3 +; SROA-CPS-NEXT: [[TMP60:%.*]] = add i32 [[CSPINIT]], 0 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT191]]) +; SROA-CPS-NEXT: unreachable +; SROA-CPS: 61: +; SROA-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT30:%.*]] = extractelement <4 x float> undef, i32 0 +; SROA-CPS-NEXT: [[TMP62:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT30]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP62]], ptr addrspace(20) @REGISTERS, align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT38:%.*]] = extractelement <4 x float> undef, i32 1 +; SROA-CPS-NEXT: [[TMP63:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT38]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP63]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT46:%.*]] = extractelement <4 x float> undef, i32 2 +; SROA-CPS-NEXT: [[TMP64:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT46]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP64]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT54:%.*]] = extractelement <4 x float> undef, i32 3 +; SROA-CPS-NEXT: [[TMP65:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT54]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP65]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; SROA-CPS-NEXT: [[TMP66:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15]] to i32 +; SROA-CPS-NEXT: [[TMP67:%.*]] = bitcast i32 [[TMP66]] to float +; SROA-CPS-NEXT: [[DOTSROA_0253_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP67]], i32 0 +; SROA-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; SROA-CPS-NEXT: [[TMP68:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19]] to i32 +; SROA-CPS-NEXT: [[TMP69:%.*]] = bitcast i32 [[TMP68]] to float +; SROA-CPS-NEXT: [[DOTSROA_0253_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0253_0_VEC_INSERT]], float [[TMP69]], i32 1 +; SROA-CPS-NEXT: [[DOTFCA_0_INSERT252:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0253_4_VEC_INSERT]], 0 +; SROA-CPS-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT192:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT252]], 0 +; SROA-CPS-NEXT: [[DOTFCA_0_GEP193:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP70]], i32 0, i32 0 +; SROA-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT192]], ptr [[DOTFCA_0_GEP193]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP194:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD195:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP194]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT196:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD195]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_GEP197:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_LOAD198:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP197]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT199:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT196]], <3 x float> [[DOTFCA_0_1_0_LOAD198]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_GEP200:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_LOAD201:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP200]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT202:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT199]], <3 x float> [[DOTFCA_0_1_1_LOAD201]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_GEP203:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_LOAD204:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP203]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT205:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT202]], float [[DOTFCA_0_1_2_LOAD204]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_GEP206:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_LOAD207:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP206]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT208:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT205]], i32 [[DOTFCA_0_1_3_LOAD207]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_GEP209:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; SROA-CPS-NEXT: [[DOTFCA_0_2_LOAD210:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP209]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT211:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT208]], <3 x float> [[DOTFCA_0_2_LOAD210]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_GEP212:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; SROA-CPS-NEXT: [[DOTFCA_0_3_LOAD213:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP212]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT214:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT211]], <3 x float> [[DOTFCA_0_3_LOAD213]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_GEP215:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; SROA-CPS-NEXT: [[DOTFCA_0_4_LOAD216:%.*]] = load float, ptr [[DOTFCA_0_4_GEP215]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT217:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT214]], float [[DOTFCA_0_4_LOAD216]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_GEP218:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; SROA-CPS-NEXT: [[DOTFCA_0_5_LOAD219:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP218]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT220:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT217]], i64 [[DOTFCA_0_5_LOAD219]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_GEP221:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; SROA-CPS-NEXT: [[DOTFCA_1_0_LOAD222:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP221]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT223:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT220]], <3 x float> [[DOTFCA_1_0_LOAD222]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_GEP224:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; SROA-CPS-NEXT: [[DOTFCA_1_1_LOAD225:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP224]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT226:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT223]], <3 x float> [[DOTFCA_1_1_LOAD225]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_GEP227:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; SROA-CPS-NEXT: [[DOTFCA_1_2_LOAD228:%.*]] = load float, ptr [[DOTFCA_1_2_GEP227]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT229:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT226]], float [[DOTFCA_1_2_LOAD228]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_GEP230:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; SROA-CPS-NEXT: [[DOTFCA_1_3_LOAD231:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP230]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT232:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT229]], i32 [[DOTFCA_1_3_LOAD231]], 1, 3 +; SROA-CPS-NEXT: [[TMP71:%.*]] = add i32 [[CSPINIT]], 0 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT232]]) +; SROA-CPS-NEXT: unreachable +; +; +; SROA-CPS-LABEL: define void @MyIntersectionShader( +; SROA-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41:![0-9]+]] !continuation [[META42:![0-9]+]] { +; SROA-CPS-NEXT: AllocaSpillBB: +; SROA-CPS-NEXT: [[TMP1:%.*]] = add i32 [[CSPINIT]], 8 +; SROA-CPS-NEXT: [[TMP2:%.*]] = inttoptr i32 [[CSPINIT]] to ptr addrspace(21) +; SROA-CPS-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i32 0 +; SROA-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP3]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; SROA-CPS-NEXT: [[TMP4:%.*]] = bitcast <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] to <3 x float> +; SROA-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP4]], 0 +; SROA-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 1 +; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 0 +; SROA-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 +; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 1 +; SROA-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP5]], 3 +; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 +; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 +; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 +; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 +; SROA-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[DOTFCA_0_4_EXTRACT]] +; SROA-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] +; SROA-CPS: callAHit.i: +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 +; SROA-CPS-NEXT: [[TMP6:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyIntersectionShader.resume.0) +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 8, {} poison, i32 [[TMP6]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32:![0-9]+]] +; SROA-CPS-NEXT: unreachable +; SROA-CPS: isEnd.i: +; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 +; SROA-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; SROA-CPS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i32 0 +; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 +; SROA-CPS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; SROA-CPS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP10]], i32 1 +; SROA-CPS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 +; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 +; SROA-CPS-NEXT: [[TMP11:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> +; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> poison, <3 x i32> +; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] +; SROA-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; SROA-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP12:%.*]], label [[TMP14:%.*]] +; SROA-CPS: 12: +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; SROA-CPS-NEXT: [[TMP13:%.*]] = add i32 [[TMP1]], -8 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; SROA-CPS-NEXT: unreachable +; SROA-CPS: 14: +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; SROA-CPS-NEXT: [[TMP15:%.*]] = add i32 [[TMP1]], -8 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; SROA-CPS-NEXT: unreachable +; +; +; SROA-CPS-LABEL: define dso_local void @MyIntersectionShader.resume.0( +; SROA-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META42]] { +; SROA-CPS-NEXT: entryresume.0: +; SROA-CPS-NEXT: [[TMP4:%.*]] = add i32 [[CSPINIT]], -8 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 3 +; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; SROA-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; SROA-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP5:%.*]], label [[TMP9:%.*]] +; SROA-CPS: 5: +; SROA-CPS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) +; SROA-CPS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 +; SROA-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(21) [[TMP7]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; SROA-CPS-NEXT: [[TMP8:%.*]] = add i32 [[CSPINIT]], -8 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; SROA-CPS-NEXT: unreachable +; SROA-CPS: 9: +; SROA-CPS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) +; SROA-CPS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 +; SROA-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP11]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; SROA-CPS-NEXT: [[TMP12:%.*]] = add i32 [[CSPINIT]], -8 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; SROA-CPS-NEXT: unreachable +; +; +; SROA-CPS-LABEL: define void @MyIntersectionShader2( +; SROA-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43:![0-9]+]] { +; SROA-CPS-NEXT: AllocaSpillBB: +; SROA-CPS-NEXT: [[TMP1:%.*]] = add i32 [[CSPINIT]], 8 +; SROA-CPS-NEXT: [[TMP2:%.*]] = inttoptr i32 [[CSPINIT]] to ptr addrspace(21) +; SROA-CPS-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i32 0 +; SROA-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP3]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; SROA-CPS-NEXT: [[TMP4:%.*]] = bitcast <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] to <3 x float> +; SROA-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP4]], 0 +; SROA-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 1 +; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 0 +; SROA-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 +; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 1 +; SROA-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP5]], 3 +; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 +; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 +; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 +; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 +; SROA-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[DOTFCA_0_4_EXTRACT]] +; SROA-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] +; SROA-CPS: callAHit.i: +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]] poison, <2 x float> undef, 0 +; SROA-CPS-NEXT: [[TMP6:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyIntersectionShader2.resume.0) +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 8, {} poison, i32 [[TMP6]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32]] +; SROA-CPS-NEXT: unreachable +; SROA-CPS: isEnd.i: +; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 +; SROA-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; SROA-CPS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i32 0 +; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 +; SROA-CPS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; SROA-CPS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP10]], i32 1 +; SROA-CPS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 +; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 +; SROA-CPS-NEXT: [[TMP11:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> +; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> poison, <3 x i32> +; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] +; SROA-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; SROA-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP12:%.*]], label [[TMP14:%.*]] +; SROA-CPS: 12: +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; SROA-CPS-NEXT: [[TMP13:%.*]] = add i32 [[TMP1]], -8 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; SROA-CPS-NEXT: unreachable +; SROA-CPS: 14: +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; SROA-CPS-NEXT: [[TMP15:%.*]] = add i32 [[TMP1]], -8 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; SROA-CPS-NEXT: unreachable +; +; +; SROA-CPS-LABEL: define dso_local void @MyIntersectionShader2.resume.0( +; SROA-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43]] { +; SROA-CPS-NEXT: entryresume.0: +; SROA-CPS-NEXT: [[TMP4:%.*]] = add i32 [[CSPINIT]], -8 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 3 +; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; SROA-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; SROA-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP5:%.*]], label [[TMP9:%.*]] +; SROA-CPS: 5: +; SROA-CPS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) +; SROA-CPS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 +; SROA-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(21) [[TMP7]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; SROA-CPS-NEXT: [[TMP8:%.*]] = add i32 [[CSPINIT]], -8 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; SROA-CPS-NEXT: unreachable +; SROA-CPS: 9: +; SROA-CPS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) +; SROA-CPS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 +; SROA-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP11]], align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; SROA-CPS-NEXT: [[TMP12:%.*]] = add i32 [[CSPINIT]], -8 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; SROA-CPS-NEXT: unreachable +; +; +; SROA-CPS-LABEL: define void @MyMissShader( +; SROA-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META41]] !lgc.cps [[META37]] !continuation [[META44:![0-9]+]] { +; SROA-CPS-NEXT: AllocaSpillBB: +; SROA-CPS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 0 +; SROA-CPS-NEXT: [[TMP1:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP1]], ptr addrspace(20) @REGISTERS, align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 +; SROA-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP2]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 +; SROA-CPS-NEXT: [[TMP3:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP3]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 +; SROA-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; SROA-CPS-NEXT: store i32 [[TMP4]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; SROA-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; SROA-CPS-NEXT: [[TMP5:%.*]] = add i32 [[CSPINIT]], 0 +; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; SROA-CPS-NEXT: unreachable +; diff --git a/shared/continuations/test/dx/paq-hit-attribute-size.ll b/llvmraytracing/test/dx/paq-hit-attribute-size.ll similarity index 98% rename from shared/continuations/test/dx/paq-hit-attribute-size.ll rename to llvmraytracing/test/dx/paq-hit-attribute-size.ll index 20003fd520..9b30438a33 100644 --- a/shared/continuations/test/dx/paq-hit-attribute-size.ll +++ b/llvmraytracing/test/dx/paq-hit-attribute-size.ll @@ -9,7 +9,7 @@ ; INVALID: Hit attributes are too large! -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.MyPayload = type { float, i32, double } diff --git a/shared/continuations/test/dx/payload-caller-in-paq.ll b/llvmraytracing/test/dx/payload-caller-in-paq.ll similarity index 97% rename from shared/continuations/test/dx/payload-caller-in-paq.ll rename to llvmraytracing/test/dx/payload-caller-in-paq.ll index cde834e0d2..3561189a03 100644 --- a/shared/continuations/test/dx/payload-caller-in-paq.ll +++ b/llvmraytracing/test/dx/payload-caller-in-paq.ll @@ -11,7 +11,7 @@ ; function name regex. ; Note that the payload has nontrivial payload access qualifiers set. -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.MyPayload = type { float, i32, double } @@ -31,7 +31,7 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32- ; Function Attrs: nounwind define void @RayGen() #0 { ; LOWERRAYTRACINGPIPELINE-LABEL: define void @RayGen( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META24:![0-9]+]] !continuation.entry [[META14:![0-9]+]] !continuation.registercount [[META24]] !continuation [[META27:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META24:![0-9]+]] !continuation.entry [[META14:![0-9]+]] !continuation.registercount [[META24]] !continuation [[META28:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) @@ -41,7 +41,7 @@ define void @RayGen() #0 { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP4]] to ptr ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP5]]) #[[ATTR0]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: store float 1.000000e+00, ptr [[TMP6]], align 8, !tbaa [[TBAA28:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: store float 1.000000e+00, ptr [[TMP6]], align 8, !tbaa [[TBAA29:![0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) @@ -51,7 +51,7 @@ define void @RayGen() #0 { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP11]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META32:![0-9]+]], !continuation.returnedRegistercount !33 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META33:![0-9]+]], !continuation.returnedRegistercount !26 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP12]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_MYPAYLOAD]] poison, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 @@ -72,7 +72,7 @@ define void @RayGen() #0 { ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP6]], align 8, !tbaa [[TBAA28]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP6]], align 8, !tbaa [[TBAA29]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4, !tbaa [[TBAA34:![0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = sitofp i32 [[TMP26]] to float diff --git a/shared/continuations/test/dx/payload-caller-in-paq.ll.hlsl b/llvmraytracing/test/dx/payload-caller-in-paq.ll.hlsl similarity index 100% rename from shared/continuations/test/dx/payload-caller-in-paq.ll.hlsl rename to llvmraytracing/test/dx/payload-caller-in-paq.ll.hlsl diff --git a/shared/continuations/test/dx/payload-save-registers.ll b/llvmraytracing/test/dx/payload-save-registers.ll similarity index 99% rename from shared/continuations/test/dx/payload-save-registers.ll rename to llvmraytracing/test/dx/payload-save-registers.ll index da2c40b5a0..37628101c3 100644 --- a/shared/continuations/test/dx/payload-save-registers.ll +++ b/llvmraytracing/test/dx/payload-save-registers.ll @@ -11,7 +11,7 @@ ; Also, function name mangling was removed. ; Note that the payload has payload access qualifiers set. -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.OuterPayload = type { [15 x float], [15 x float] } diff --git a/shared/continuations/test/dx/payload-save-registers.ll.hlsl b/llvmraytracing/test/dx/payload-save-registers.ll.hlsl similarity index 100% rename from shared/continuations/test/dx/payload-save-registers.ll.hlsl rename to llvmraytracing/test/dx/payload-save-registers.ll.hlsl diff --git a/shared/continuations/test/dx/payload.ll b/llvmraytracing/test/dx/payload.ll similarity index 53% rename from shared/continuations/test/dx/payload.ll rename to llvmraytracing/test/dx/payload.ll index 334310a35d..9d38132eb3 100644 --- a/shared/continuations/test/dx/payload.ll +++ b/llvmraytracing/test/dx/payload.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,remove-types-metadata' -S 2> %t0.stderr | FileCheck -check-prefix=CLEANUP %s ; RUN: count 0 < %t0.stderr -; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' \ +; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ ; RUN: -S 2> %t1.stderr | FileCheck -check-prefix=POST-PROCESS %s ; RUN: count 0 < %t1.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' \ +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ ; RUN: -S %s 2> %t2.stderr | FileCheck -check-prefix=POST-PROCESS-GLOBAL %s ; RUN: count 0 < %t2.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.DispatchSystemData = type { <3 x i32> } @@ -198,24 +198,20 @@ attributes #3 = { nounwind } ; CLEANUP-LABEL: define void @main( ; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !continuation.entry [[META20:![0-9]+]] !continuation.registercount [[META9]] !continuation [[META21:![0-9]+]] !continuation.stacksize [[META22:![0-9]+]] !continuation.state [[META9]] { ; CLEANUP-NEXT: AllocaSpillBB: +; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MAIN_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 108 -; CLEANUP-NEXT: store i32 [[TMP3]], ptr [[TMP1]], align 4 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; CLEANUP-NEXT: [[TMP4:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; CLEANUP-NEXT: [[TMP5:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; CLEANUP-NEXT: [[TMP6:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP4]]) -; CLEANUP-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP6]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; CLEANUP-NEXT: [[TMP8:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP7]]) +; CLEANUP-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; CLEANUP-NEXT: [[TMP4:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) +; CLEANUP-NEXT: [[TMP5:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP4]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-NEXT: [[TMP6:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP5]]) ; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 ; CLEANUP-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; CLEANUP-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; CLEANUP-NEXT: [[TMP9:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -; CLEANUP-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -108 -; CLEANUP-NEXT: store i32 [[TMP11]], ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: [[TMP7:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 +; CLEANUP-NEXT: store i32 [[TMP7]], ptr @PAYLOAD, align 4 ; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; CLEANUP-NEXT: store i32 undef, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 ; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 @@ -267,15 +263,17 @@ attributes #3 = { nounwind } ; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 ; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 ; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 -; CLEANUP-NEXT: [[TMP12:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP13]], i64 ptrtoint (ptr @main.resume.0 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META18:![0-9]+]], !continuation.returnedRegistercount !18 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 108) +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i64 ptrtoint (ptr @main.resume.0 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META18:![0-9]+]], !continuation.returnedRegistercount !18 ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @main.resume.0( -; CLEANUP-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META9]] !continuation.registercount [[META18]] !continuation [[META21]] { +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9]] !continuation.registercount [[META18]] !continuation [[META21]] { ; CLEANUP-NEXT: entryresume.0: +; CLEANUP-NEXT: call void @lgc.cps.free(i32 108) +; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MAIN_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 ; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 ; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 ; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 @@ -326,18 +324,15 @@ attributes #3 = { nounwind } ; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 ; CLEANUP-NEXT: [[TMP50:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 ; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; CLEANUP-NEXT: [[TMP52:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP52]], align 4 -; CLEANUP-NEXT: [[TMP54:%.*]] = add i32 [[TMP53]], -108 -; CLEANUP-NEXT: store i32 [[TMP54]], ptr [[TMP52]], align 4 -; CLEANUP-NEXT: call void @continuation.complete() +; CLEANUP-NEXT: ret void +; CLEANUP: entryresume.0.split: ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define void @AnyHit( -; CLEANUP-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META18]] !continuation [[META24:![0-9]+]] !continuation.state [[META9]] { +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META18]] !continuation [[META24:![0-9]+]] !continuation.state [[META9]] { ; CLEANUP-NEXT: AllocaSpillBB: ; CLEANUP-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 ; CLEANUP-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 @@ -527,288 +522,262 @@ attributes #3 = { nounwind } ; CLEANUP-NEXT: [[DOTFCA_1_1_GEP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 ; CLEANUP-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP10]], align 4 ; CLEANUP-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 -; CLEANUP-NEXT: [[TMP61:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP62:%.*]] = load i32, ptr [[TMP61]], align 4 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP62]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META18]] +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META18]] ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define void @ClosestHit( -; CLEANUP-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META25:![0-9]+]] !continuation.registercount [[META18]] !continuation [[META26:![0-9]+]] !continuation.stacksize [[META27:![0-9]+]] !continuation.state [[META28:![0-9]+]] { +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META25:![0-9]+]] !continuation.registercount [[META18]] !continuation [[META26:![0-9]+]] !continuation.stacksize [[META27:![0-9]+]] !continuation.state [[META28:![0-9]+]] { ; CLEANUP-NEXT: AllocaSpillBB: -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) -; CLEANUP-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i64 108 -; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME:%.*]], ptr addrspace(21) [[TMP4]], i32 0, i32 0 -; CLEANUP-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 +; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 +; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 1 +; CLEANUP-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; CLEANUP-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 -; CLEANUP-NEXT: [[TMP5:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 108 -; CLEANUP-NEXT: store i32 [[TMP7]], ptr [[TMP5]], align 4 -; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 -; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 -; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 -; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 -; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 -; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 -; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; CLEANUP-NEXT: [[TMP26:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; CLEANUP-NEXT: [[TMP27:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; CLEANUP-NEXT: [[TMP28:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; CLEANUP-NEXT: [[TMP30:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 -; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 -; CLEANUP-NEXT: [[TMP32:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 -; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 -; CLEANUP-NEXT: [[TMP34:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 -; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 -; CLEANUP-NEXT: [[TMP36:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 -; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 -; CLEANUP-NEXT: [[TMP38:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 -; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 -; CLEANUP-NEXT: [[TMP40:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 -; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 -; CLEANUP-NEXT: [[TMP42:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 -; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 -; CLEANUP-NEXT: [[TMP44:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 -; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 -; CLEANUP-NEXT: [[TMP46:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 -; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 -; CLEANUP-NEXT: [[TMP48:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 -; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 -; CLEANUP-NEXT: [[TMP50:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 -; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 -; CLEANUP-NEXT: [[TMP52:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 -; CLEANUP-NEXT: [[TMP53:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 -; CLEANUP-NEXT: [[TMP54:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 -; CLEANUP-NEXT: [[TMP55:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 -; CLEANUP-NEXT: [[TMP56:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 -; CLEANUP-NEXT: [[TMP57:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 +; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 +; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 +; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 +; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 +; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 +; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 +; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 +; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 +; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 +; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 +; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 +; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 +; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 +; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 +; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 +; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 +; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 +; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 +; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 +; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 +; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 +; CLEANUP-NEXT: [[TMP26:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 +; CLEANUP-NEXT: [[TMP27:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 +; CLEANUP-NEXT: [[TMP28:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 +; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 +; CLEANUP-NEXT: [[TMP30:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 +; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 +; CLEANUP-NEXT: [[TMP32:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 +; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 +; CLEANUP-NEXT: [[TMP34:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 +; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 +; CLEANUP-NEXT: [[TMP36:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 +; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 +; CLEANUP-NEXT: [[TMP38:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 +; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 +; CLEANUP-NEXT: [[TMP40:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 +; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 +; CLEANUP-NEXT: [[TMP42:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 +; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 +; CLEANUP-NEXT: [[TMP44:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 +; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 +; CLEANUP-NEXT: [[TMP46:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 +; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 +; CLEANUP-NEXT: [[TMP48:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 +; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 +; CLEANUP-NEXT: [[TMP50:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 +; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 ; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: [[TMP58:%.*]] = load i32, ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(21) [[TMP4]], i32 0, i32 1 -; CLEANUP-NEXT: store i32 [[TMP58]], ptr addrspace(21) [[DOTSPILL_ADDR]], align 4 +; CLEANUP-NEXT: [[TMP52:%.*]] = load i32, ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 2 +; CLEANUP-NEXT: store i32 [[TMP52]], ptr addrspace(32) [[DOTSPILL_ADDR]], align 4 ; CLEANUP-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 ; CLEANUP-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 ; CLEANUP-NEXT: [[DOTSROA_053_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; CLEANUP-NEXT: [[TMP59:%.*]] = bitcast float [[DOTSROA_053_0_VEC_EXTRACT]] to i32 +; CLEANUP-NEXT: [[TMP53:%.*]] = bitcast float [[DOTSROA_053_0_VEC_EXTRACT]] to i32 ; CLEANUP-NEXT: [[DOTSROA_053_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; CLEANUP-NEXT: [[TMP60:%.*]] = bitcast float [[DOTSROA_053_4_VEC_EXTRACT]] to i32 +; CLEANUP-NEXT: [[TMP54:%.*]] = bitcast float [[DOTSROA_053_4_VEC_EXTRACT]] to i32 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CLEANUP-NEXT: [[TMP61:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; CLEANUP-NEXT: [[TMP62:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; CLEANUP-NEXT: [[TMP63:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP61]]) -; CLEANUP-NEXT: [[TMP64:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP63]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; CLEANUP-NEXT: [[TMP65:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP64]]) +; CLEANUP-NEXT: [[TMP55:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; CLEANUP-NEXT: [[TMP56:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; CLEANUP-NEXT: [[TMP57:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP55]]) +; CLEANUP-NEXT: [[TMP58:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP57]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-NEXT: [[TMP59:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP58]]) ; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; CLEANUP-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; CLEANUP-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; CLEANUP-NEXT: [[TMP66:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP67:%.*]] = load i32, ptr [[TMP66]], align 4 -; CLEANUP-NEXT: [[TMP68:%.*]] = add i32 [[TMP67]], -108 -; CLEANUP-NEXT: store i32 [[TMP68]], ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: [[TMP60:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 +; CLEANUP-NEXT: store i32 [[TMP60]], ptr @PAYLOAD, align 4 ; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: store i32 [[TMP8]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; CLEANUP-NEXT: store i32 [[TMP9]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; CLEANUP-NEXT: store i32 [[TMP10]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; CLEANUP-NEXT: store i32 [[TMP11]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 -; CLEANUP-NEXT: store i32 [[TMP12]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 -; CLEANUP-NEXT: store i32 [[TMP13]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 -; CLEANUP-NEXT: store i32 [[TMP14]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 -; CLEANUP-NEXT: store i32 [[TMP15]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 -; CLEANUP-NEXT: store i32 [[TMP16]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 -; CLEANUP-NEXT: store i32 [[TMP17]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; CLEANUP-NEXT: store i32 [[TMP18]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; CLEANUP-NEXT: store i32 [[TMP19]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; CLEANUP-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; CLEANUP-NEXT: store i32 [[TMP21]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; CLEANUP-NEXT: store i32 [[TMP22]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; CLEANUP-NEXT: store i32 [[TMP23]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; CLEANUP-NEXT: store i32 [[TMP24]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; CLEANUP-NEXT: store i32 [[TMP25]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; CLEANUP-NEXT: store i32 [[TMP26]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; CLEANUP-NEXT: store i32 [[TMP27]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; CLEANUP-NEXT: store i32 [[TMP28]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; CLEANUP-NEXT: store i32 [[TMP29]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; CLEANUP-NEXT: store i32 [[TMP30]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 -; CLEANUP-NEXT: store i32 [[TMP31]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 -; CLEANUP-NEXT: store i32 [[TMP32]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 -; CLEANUP-NEXT: store i32 [[TMP33]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 -; CLEANUP-NEXT: store i32 [[TMP34]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 -; CLEANUP-NEXT: store i32 [[TMP35]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 -; CLEANUP-NEXT: store i32 [[TMP36]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 -; CLEANUP-NEXT: store i32 [[TMP37]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 -; CLEANUP-NEXT: store i32 [[TMP38]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 -; CLEANUP-NEXT: store i32 [[TMP39]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 -; CLEANUP-NEXT: store i32 [[TMP40]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 -; CLEANUP-NEXT: store i32 [[TMP41]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 -; CLEANUP-NEXT: store i32 [[TMP42]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 -; CLEANUP-NEXT: store i32 [[TMP43]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 -; CLEANUP-NEXT: store i32 [[TMP44]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 -; CLEANUP-NEXT: store i32 [[TMP45]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 -; CLEANUP-NEXT: store i32 [[TMP46]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 -; CLEANUP-NEXT: store i32 [[TMP47]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 -; CLEANUP-NEXT: store i32 [[TMP48]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 -; CLEANUP-NEXT: store i32 [[TMP49]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 -; CLEANUP-NEXT: store i32 [[TMP50]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 -; CLEANUP-NEXT: store i32 [[TMP51]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 -; CLEANUP-NEXT: store i32 [[TMP52]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 -; CLEANUP-NEXT: store i32 [[TMP53]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 -; CLEANUP-NEXT: store i32 [[TMP54]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 -; CLEANUP-NEXT: store i32 [[TMP55]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 -; CLEANUP-NEXT: store i32 [[TMP56]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 -; CLEANUP-NEXT: store i32 [[TMP57]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 -; CLEANUP-NEXT: [[TMP69:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4 -; CLEANUP-NEXT: [[TMP71:%.*]] = add i32 [[TMP70]], 12 -; CLEANUP-NEXT: store i32 [[TMP71]], ptr [[TMP69]], align 4 -; CLEANUP-NEXT: [[TMP72:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP73]], i64 ptrtoint (ptr @ClosestHit.resume.0 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META18]], !continuation.returnedRegistercount !18 +; CLEANUP-NEXT: store i32 [[TMP2]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; CLEANUP-NEXT: store i32 [[TMP3]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; CLEANUP-NEXT: store i32 [[TMP4]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; CLEANUP-NEXT: store i32 [[TMP5]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 +; CLEANUP-NEXT: store i32 [[TMP6]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 +; CLEANUP-NEXT: store i32 [[TMP7]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 +; CLEANUP-NEXT: store i32 [[TMP8]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 +; CLEANUP-NEXT: store i32 [[TMP9]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 +; CLEANUP-NEXT: store i32 [[TMP10]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 +; CLEANUP-NEXT: store i32 [[TMP11]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 +; CLEANUP-NEXT: store i32 [[TMP12]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 +; CLEANUP-NEXT: store i32 [[TMP13]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 +; CLEANUP-NEXT: store i32 [[TMP14]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 +; CLEANUP-NEXT: store i32 [[TMP15]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 +; CLEANUP-NEXT: store i32 [[TMP16]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 +; CLEANUP-NEXT: store i32 [[TMP17]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 +; CLEANUP-NEXT: store i32 [[TMP18]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 +; CLEANUP-NEXT: store i32 [[TMP19]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 +; CLEANUP-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 +; CLEANUP-NEXT: store i32 [[TMP21]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 +; CLEANUP-NEXT: store i32 [[TMP22]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 +; CLEANUP-NEXT: store i32 [[TMP23]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 +; CLEANUP-NEXT: store i32 [[TMP24]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 +; CLEANUP-NEXT: store i32 [[TMP25]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 +; CLEANUP-NEXT: store i32 [[TMP26]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 +; CLEANUP-NEXT: store i32 [[TMP27]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 +; CLEANUP-NEXT: store i32 [[TMP28]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 +; CLEANUP-NEXT: store i32 [[TMP29]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 +; CLEANUP-NEXT: store i32 [[TMP30]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 +; CLEANUP-NEXT: store i32 [[TMP31]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 +; CLEANUP-NEXT: store i32 [[TMP32]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 +; CLEANUP-NEXT: store i32 [[TMP33]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 +; CLEANUP-NEXT: store i32 [[TMP34]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 +; CLEANUP-NEXT: store i32 [[TMP35]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 +; CLEANUP-NEXT: store i32 [[TMP36]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 +; CLEANUP-NEXT: store i32 [[TMP37]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 +; CLEANUP-NEXT: store i32 [[TMP38]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 +; CLEANUP-NEXT: store i32 [[TMP39]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 +; CLEANUP-NEXT: store i32 [[TMP40]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 +; CLEANUP-NEXT: store i32 [[TMP41]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 +; CLEANUP-NEXT: store i32 [[TMP42]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 +; CLEANUP-NEXT: store i32 [[TMP43]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 +; CLEANUP-NEXT: store i32 [[TMP44]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 +; CLEANUP-NEXT: store i32 [[TMP45]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 +; CLEANUP-NEXT: store i32 [[TMP46]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 +; CLEANUP-NEXT: store i32 [[TMP47]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 +; CLEANUP-NEXT: store i32 [[TMP48]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 +; CLEANUP-NEXT: store i32 [[TMP49]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 +; CLEANUP-NEXT: store i32 [[TMP50]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 +; CLEANUP-NEXT: store i32 [[TMP51]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 120) +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i64 ptrtoint (ptr @ClosestHit.resume.0 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META18]], !continuation.returnedRegistercount !18 ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @ClosestHit.resume.0( -; CLEANUP-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META25]] !continuation.registercount [[META18]] !continuation [[META26]] { +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META25]] !continuation.registercount [[META18]] !continuation [[META26]] { ; CLEANUP-NEXT: entryresume.0: -; CLEANUP-NEXT: [[TMP2:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -12 -; CLEANUP-NEXT: store i32 [[TMP4]], ptr [[TMP2]], align 4 -; CLEANUP-NEXT: [[TMP5:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = inttoptr i32 [[TMP6]] to ptr addrspace(21) -; CLEANUP-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP7]], i64 0 -; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 -; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 -; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 -; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 -; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 -; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 -; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; CLEANUP-NEXT: [[TMP26:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; CLEANUP-NEXT: [[TMP27:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; CLEANUP-NEXT: [[TMP28:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; CLEANUP-NEXT: [[TMP30:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 -; CLEANUP-NEXT: [[TMP32:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 -; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 -; CLEANUP-NEXT: [[TMP34:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 -; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 -; CLEANUP-NEXT: [[TMP36:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 -; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 -; CLEANUP-NEXT: [[TMP38:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 -; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 -; CLEANUP-NEXT: [[TMP40:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 -; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 -; CLEANUP-NEXT: [[TMP42:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 -; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 -; CLEANUP-NEXT: [[TMP44:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 -; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 -; CLEANUP-NEXT: [[TMP46:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 -; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 -; CLEANUP-NEXT: [[TMP48:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 -; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 -; CLEANUP-NEXT: [[TMP50:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 -; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 -; CLEANUP-NEXT: [[TMP52:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 -; CLEANUP-NEXT: [[TMP53:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 -; CLEANUP-NEXT: [[TMP54:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 -; CLEANUP-NEXT: [[TMP55:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 -; CLEANUP-NEXT: [[TMP56:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 -; CLEANUP-NEXT: [[TMP57:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 -; CLEANUP-NEXT: [[TMP58:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; CLEANUP-NEXT: call void @lgc.cps.free(i32 120) +; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 +; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 +; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 +; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 +; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 +; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 +; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 +; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 +; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 +; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 +; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 +; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 +; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 +; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 +; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 +; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 +; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 +; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 +; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 +; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 +; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 +; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 +; CLEANUP-NEXT: [[TMP26:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 +; CLEANUP-NEXT: [[TMP27:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 +; CLEANUP-NEXT: [[TMP28:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 +; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 +; CLEANUP-NEXT: [[TMP30:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 +; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 +; CLEANUP-NEXT: [[TMP32:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 +; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 +; CLEANUP-NEXT: [[TMP34:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 +; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 +; CLEANUP-NEXT: [[TMP36:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 +; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 +; CLEANUP-NEXT: [[TMP38:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 +; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 +; CLEANUP-NEXT: [[TMP40:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 +; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 +; CLEANUP-NEXT: [[TMP42:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 +; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 +; CLEANUP-NEXT: [[TMP44:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 +; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 +; CLEANUP-NEXT: [[TMP46:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 +; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 +; CLEANUP-NEXT: [[TMP48:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 +; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 +; CLEANUP-NEXT: [[TMP50:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 +; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CLEANUP-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME:%.*]], ptr addrspace(21) [[TMP8]], i32 0, i32 1 -; CLEANUP-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(21) [[DOTRELOAD_ADDR]], align 4 -; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(21) [[TMP8]], i32 0, i32 0 -; CLEANUP-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[RETURNADDR_RELOAD_ADDR]], align 4 +; CLEANUP-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 2 +; CLEANUP-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(32) [[DOTRELOAD_ADDR]], align 4 +; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 1 +; CLEANUP-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 ; CLEANUP-NEXT: store i32 [[DOTRELOAD]], ptr @PAYLOAD, align 4 ; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: store i32 [[TMP9]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; CLEANUP-NEXT: store i32 [[TMP10]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; CLEANUP-NEXT: store i32 [[TMP11]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; CLEANUP-NEXT: store i32 [[TMP12]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 -; CLEANUP-NEXT: store i32 [[TMP13]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 -; CLEANUP-NEXT: store i32 [[TMP14]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 -; CLEANUP-NEXT: store i32 [[TMP15]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 -; CLEANUP-NEXT: store i32 [[TMP16]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 -; CLEANUP-NEXT: store i32 [[TMP17]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 -; CLEANUP-NEXT: store i32 [[TMP18]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; CLEANUP-NEXT: store i32 [[TMP19]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; CLEANUP-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; CLEANUP-NEXT: store i32 [[TMP21]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; CLEANUP-NEXT: store i32 [[TMP22]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; CLEANUP-NEXT: store i32 [[TMP23]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; CLEANUP-NEXT: store i32 [[TMP24]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; CLEANUP-NEXT: store i32 [[TMP25]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; CLEANUP-NEXT: store i32 [[TMP26]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; CLEANUP-NEXT: store i32 [[TMP27]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; CLEANUP-NEXT: store i32 [[TMP28]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; CLEANUP-NEXT: store i32 [[TMP29]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; CLEANUP-NEXT: store i32 [[TMP30]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; CLEANUP-NEXT: store i32 [[TMP31]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 -; CLEANUP-NEXT: store i32 [[TMP32]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 -; CLEANUP-NEXT: store i32 [[TMP33]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 -; CLEANUP-NEXT: store i32 [[TMP34]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 -; CLEANUP-NEXT: store i32 [[TMP35]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 -; CLEANUP-NEXT: store i32 [[TMP36]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 -; CLEANUP-NEXT: store i32 [[TMP37]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 -; CLEANUP-NEXT: store i32 [[TMP38]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 -; CLEANUP-NEXT: store i32 [[TMP39]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 -; CLEANUP-NEXT: store i32 [[TMP40]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 -; CLEANUP-NEXT: store i32 [[TMP41]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 -; CLEANUP-NEXT: store i32 [[TMP42]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 -; CLEANUP-NEXT: store i32 [[TMP43]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 -; CLEANUP-NEXT: store i32 [[TMP44]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 -; CLEANUP-NEXT: store i32 [[TMP45]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 -; CLEANUP-NEXT: store i32 [[TMP46]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 -; CLEANUP-NEXT: store i32 [[TMP47]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 -; CLEANUP-NEXT: store i32 [[TMP48]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 -; CLEANUP-NEXT: store i32 [[TMP49]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 -; CLEANUP-NEXT: store i32 [[TMP50]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 -; CLEANUP-NEXT: store i32 [[TMP51]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 -; CLEANUP-NEXT: store i32 [[TMP52]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 -; CLEANUP-NEXT: store i32 [[TMP53]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 -; CLEANUP-NEXT: store i32 [[TMP54]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 -; CLEANUP-NEXT: store i32 [[TMP55]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 -; CLEANUP-NEXT: store i32 [[TMP56]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 -; CLEANUP-NEXT: store i32 [[TMP57]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 -; CLEANUP-NEXT: store i32 [[TMP58]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 -; CLEANUP-NEXT: [[TMP59:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4 -; CLEANUP-NEXT: [[TMP61:%.*]] = add i32 [[TMP60]], -108 -; CLEANUP-NEXT: store i32 [[TMP61]], ptr [[TMP59]], align 4 +; CLEANUP-NEXT: store i32 [[TMP2]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; CLEANUP-NEXT: store i32 [[TMP3]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; CLEANUP-NEXT: store i32 [[TMP4]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; CLEANUP-NEXT: store i32 [[TMP5]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 +; CLEANUP-NEXT: store i32 [[TMP6]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 +; CLEANUP-NEXT: store i32 [[TMP7]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 +; CLEANUP-NEXT: store i32 [[TMP8]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 +; CLEANUP-NEXT: store i32 [[TMP9]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 +; CLEANUP-NEXT: store i32 [[TMP10]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 +; CLEANUP-NEXT: store i32 [[TMP11]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 +; CLEANUP-NEXT: store i32 [[TMP12]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 +; CLEANUP-NEXT: store i32 [[TMP13]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 +; CLEANUP-NEXT: store i32 [[TMP14]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 +; CLEANUP-NEXT: store i32 [[TMP15]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 +; CLEANUP-NEXT: store i32 [[TMP16]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 +; CLEANUP-NEXT: store i32 [[TMP17]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 +; CLEANUP-NEXT: store i32 [[TMP18]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 +; CLEANUP-NEXT: store i32 [[TMP19]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 +; CLEANUP-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 +; CLEANUP-NEXT: store i32 [[TMP21]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 +; CLEANUP-NEXT: store i32 [[TMP22]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 +; CLEANUP-NEXT: store i32 [[TMP23]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 +; CLEANUP-NEXT: store i32 [[TMP24]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 +; CLEANUP-NEXT: store i32 [[TMP25]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 +; CLEANUP-NEXT: store i32 [[TMP26]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 +; CLEANUP-NEXT: store i32 [[TMP27]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 +; CLEANUP-NEXT: store i32 [[TMP28]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 +; CLEANUP-NEXT: store i32 [[TMP29]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 +; CLEANUP-NEXT: store i32 [[TMP30]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 +; CLEANUP-NEXT: store i32 [[TMP31]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 +; CLEANUP-NEXT: store i32 [[TMP32]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 +; CLEANUP-NEXT: store i32 [[TMP33]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 +; CLEANUP-NEXT: store i32 [[TMP34]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 +; CLEANUP-NEXT: store i32 [[TMP35]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 +; CLEANUP-NEXT: store i32 [[TMP36]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 +; CLEANUP-NEXT: store i32 [[TMP37]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 +; CLEANUP-NEXT: store i32 [[TMP38]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 +; CLEANUP-NEXT: store i32 [[TMP39]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 +; CLEANUP-NEXT: store i32 [[TMP40]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 +; CLEANUP-NEXT: store i32 [[TMP41]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 +; CLEANUP-NEXT: store i32 [[TMP42]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 +; CLEANUP-NEXT: store i32 [[TMP43]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 +; CLEANUP-NEXT: store i32 [[TMP44]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 +; CLEANUP-NEXT: store i32 [[TMP45]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 +; CLEANUP-NEXT: store i32 [[TMP46]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 +; CLEANUP-NEXT: store i32 [[TMP47]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 +; CLEANUP-NEXT: store i32 [[TMP48]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 +; CLEANUP-NEXT: store i32 [[TMP49]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 +; CLEANUP-NEXT: store i32 [[TMP50]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 +; CLEANUP-NEXT: store i32 [[TMP51]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 ; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 -; CLEANUP-NEXT: [[TMP62:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP63:%.*]] = load i32, ptr [[TMP62]], align 4 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP63]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META18]] +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META18]] ; CLEANUP-NEXT: unreachable ; ; @@ -828,13 +797,12 @@ attributes #3 = { nounwind } ; POST-PROCESS-SAME: ) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META20:![0-9]+]] !continuation.stacksize [[META21:![0-9]+]] !continuation.state [[META8]] { ; POST-PROCESS-NEXT: AllocaSpillBB: ; POST-PROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; POST-PROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() ; POST-PROCESS-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() ; POST-PROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 +; POST-PROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() ; POST-PROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 108 -; POST-PROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 +; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; POST-PROCESS-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; POST-PROCESS-NEXT: [[TMP4:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 @@ -844,9 +812,7 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 ; POST-PROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; POST-PROCESS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], -108 -; POST-PROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) @REGISTERS, align 4 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 @@ -870,316 +836,376 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], -120 +; POST-PROCESS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 120 ; POST-PROCESS-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP10]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP11]], i32 -30 -; POST-PROCESS-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP12]], i32 0, i32 0, i64 30 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP13]], align 4 -; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP15]], i32 -30 -; POST-PROCESS-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP16]], i32 0, i32 0, i64 31 +; POST-PROCESS-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP11]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP12]], align 4 +; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], -120 +; POST-PROCESS-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 124 +; POST-PROCESS-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP15]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP16]], i32 0 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP17]], align 4 ; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP18]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP19]], i32 -30 -; POST-PROCESS-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP20]], i32 0, i32 0, i64 32 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP21]], align 4 -; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP22]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP23]], i32 -30 -; POST-PROCESS-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP24]], i32 0, i32 0, i64 33 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP25]], align 4 -; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP26]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP27]], i32 -30 -; POST-PROCESS-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP28]], i32 0, i32 0, i64 34 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP29]], align 4 -; POST-PROCESS-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], -120 +; POST-PROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 128 +; POST-PROCESS-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP20]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP21]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP22]], align 4 +; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], -120 +; POST-PROCESS-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], 132 +; POST-PROCESS-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP25]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP26]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP27]], align 4 +; POST-PROCESS-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], -120 +; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], 136 ; POST-PROCESS-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP31]], i32 -30 -; POST-PROCESS-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP32]], i32 0, i32 0, i64 35 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP33]], align 4 -; POST-PROCESS-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP34]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP35]], i32 -30 -; POST-PROCESS-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP36]], i32 0, i32 0, i64 36 +; POST-PROCESS-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP31]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP32]], align 4 +; POST-PROCESS-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], -120 +; POST-PROCESS-NEXT: [[TMP35:%.*]] = add i32 [[TMP34]], 140 +; POST-PROCESS-NEXT: [[TMP36:%.*]] = inttoptr i32 [[TMP35]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP36]], i32 0 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP37]], align 4 ; POST-PROCESS-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP39:%.*]] = inttoptr i32 [[TMP38]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP39]], i32 -30 -; POST-PROCESS-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP40]], i32 0, i32 0, i64 37 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP41]], align 4 -; POST-PROCESS-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP43:%.*]] = inttoptr i32 [[TMP42]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP44:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP43]], i32 -30 -; POST-PROCESS-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP44]], i32 0, i32 0, i64 38 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP45]], align 4 -; POST-PROCESS-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP47:%.*]] = inttoptr i32 [[TMP46]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP47]], i32 -30 -; POST-PROCESS-NEXT: [[TMP49:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP48]], i32 0, i32 0, i64 39 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP49]], align 4 -; POST-PROCESS-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP39:%.*]] = add i32 [[TMP38]], -120 +; POST-PROCESS-NEXT: [[TMP40:%.*]] = add i32 [[TMP39]], 144 +; POST-PROCESS-NEXT: [[TMP41:%.*]] = inttoptr i32 [[TMP40]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP41]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP42]], align 4 +; POST-PROCESS-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP44:%.*]] = add i32 [[TMP43]], -120 +; POST-PROCESS-NEXT: [[TMP45:%.*]] = add i32 [[TMP44]], 148 +; POST-PROCESS-NEXT: [[TMP46:%.*]] = inttoptr i32 [[TMP45]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP46]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP47]], align 4 +; POST-PROCESS-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP49:%.*]] = add i32 [[TMP48]], -120 +; POST-PROCESS-NEXT: [[TMP50:%.*]] = add i32 [[TMP49]], 152 ; POST-PROCESS-NEXT: [[TMP51:%.*]] = inttoptr i32 [[TMP50]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP52:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP51]], i32 -30 -; POST-PROCESS-NEXT: [[TMP53:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP52]], i32 0, i32 0, i64 40 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP53]], align 4 -; POST-PROCESS-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP55:%.*]] = inttoptr i32 [[TMP54]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP55]], i32 -30 -; POST-PROCESS-NEXT: [[TMP57:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP56]], i32 0, i32 0, i64 41 +; POST-PROCESS-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP51]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP52]], align 4 +; POST-PROCESS-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[TMP53]], -120 +; POST-PROCESS-NEXT: [[TMP55:%.*]] = add i32 [[TMP54]], 156 +; POST-PROCESS-NEXT: [[TMP56:%.*]] = inttoptr i32 [[TMP55]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP56]], i32 0 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP57]], align 4 ; POST-PROCESS-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP59:%.*]] = inttoptr i32 [[TMP58]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP59]], i32 -30 -; POST-PROCESS-NEXT: [[TMP61:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP60]], i32 0, i32 0, i64 42 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP61]], align 4 -; POST-PROCESS-NEXT: [[TMP62:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP63:%.*]] = inttoptr i32 [[TMP62]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP63]], i32 -30 -; POST-PROCESS-NEXT: [[TMP65:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP64]], i32 0, i32 0, i64 43 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP65]], align 4 -; POST-PROCESS-NEXT: [[TMP66:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP67:%.*]] = inttoptr i32 [[TMP66]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP68:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP67]], i32 -30 -; POST-PROCESS-NEXT: [[TMP69:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP68]], i32 0, i32 0, i64 44 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP69]], align 4 -; POST-PROCESS-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP59:%.*]] = add i32 [[TMP58]], -120 +; POST-PROCESS-NEXT: [[TMP60:%.*]] = add i32 [[TMP59]], 160 +; POST-PROCESS-NEXT: [[TMP61:%.*]] = inttoptr i32 [[TMP60]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP61]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP62]], align 4 +; POST-PROCESS-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP64:%.*]] = add i32 [[TMP63]], -120 +; POST-PROCESS-NEXT: [[TMP65:%.*]] = add i32 [[TMP64]], 164 +; POST-PROCESS-NEXT: [[TMP66:%.*]] = inttoptr i32 [[TMP65]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP67:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP66]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP67]], align 4 +; POST-PROCESS-NEXT: [[TMP68:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP69:%.*]] = add i32 [[TMP68]], -120 +; POST-PROCESS-NEXT: [[TMP70:%.*]] = add i32 [[TMP69]], 168 ; POST-PROCESS-NEXT: [[TMP71:%.*]] = inttoptr i32 [[TMP70]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP71]], i32 -30 -; POST-PROCESS-NEXT: [[TMP73:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP72]], i32 0, i32 0, i64 45 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP73]], align 4 -; POST-PROCESS-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP75:%.*]] = inttoptr i32 [[TMP74]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP76:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP75]], i32 -30 -; POST-PROCESS-NEXT: [[TMP77:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP76]], i32 0, i32 0, i64 46 +; POST-PROCESS-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP71]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP72]], align 4 +; POST-PROCESS-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP74:%.*]] = add i32 [[TMP73]], -120 +; POST-PROCESS-NEXT: [[TMP75:%.*]] = add i32 [[TMP74]], 172 +; POST-PROCESS-NEXT: [[TMP76:%.*]] = inttoptr i32 [[TMP75]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP77:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP76]], i32 0 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP77]], align 4 ; POST-PROCESS-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP79:%.*]] = inttoptr i32 [[TMP78]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP80:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP79]], i32 -30 -; POST-PROCESS-NEXT: [[TMP81:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP80]], i32 0, i32 0, i64 47 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP81]], align 4 -; POST-PROCESS-NEXT: [[TMP82:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP83:%.*]] = inttoptr i32 [[TMP82]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP84:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP83]], i32 -30 -; POST-PROCESS-NEXT: [[TMP85:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP84]], i32 0, i32 0, i64 48 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP85]], align 4 -; POST-PROCESS-NEXT: [[TMP86:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP87:%.*]] = inttoptr i32 [[TMP86]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP88:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP87]], i32 -30 -; POST-PROCESS-NEXT: [[TMP89:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP88]], i32 0, i32 0, i64 49 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP89]], align 4 -; POST-PROCESS-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP79:%.*]] = add i32 [[TMP78]], -120 +; POST-PROCESS-NEXT: [[TMP80:%.*]] = add i32 [[TMP79]], 176 +; POST-PROCESS-NEXT: [[TMP81:%.*]] = inttoptr i32 [[TMP80]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP81]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP82]], align 4 +; POST-PROCESS-NEXT: [[TMP83:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP84:%.*]] = add i32 [[TMP83]], -120 +; POST-PROCESS-NEXT: [[TMP85:%.*]] = add i32 [[TMP84]], 180 +; POST-PROCESS-NEXT: [[TMP86:%.*]] = inttoptr i32 [[TMP85]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP86]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP87]], align 4 +; POST-PROCESS-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP89:%.*]] = add i32 [[TMP88]], -120 +; POST-PROCESS-NEXT: [[TMP90:%.*]] = add i32 [[TMP89]], 184 ; POST-PROCESS-NEXT: [[TMP91:%.*]] = inttoptr i32 [[TMP90]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP92:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP91]], i32 -30 -; POST-PROCESS-NEXT: [[TMP93:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP92]], i32 0, i32 0, i64 50 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP93]], align 4 -; POST-PROCESS-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP95:%.*]] = inttoptr i32 [[TMP94]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP96:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP95]], i32 -30 -; POST-PROCESS-NEXT: [[TMP97:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP96]], i32 0, i32 0, i64 51 +; POST-PROCESS-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP91]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP92]], align 4 +; POST-PROCESS-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP94:%.*]] = add i32 [[TMP93]], -120 +; POST-PROCESS-NEXT: [[TMP95:%.*]] = add i32 [[TMP94]], 188 +; POST-PROCESS-NEXT: [[TMP96:%.*]] = inttoptr i32 [[TMP95]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP96]], i32 0 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP97]], align 4 ; POST-PROCESS-NEXT: [[TMP98:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP99:%.*]] = inttoptr i32 [[TMP98]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP100:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP99]], i32 -30 -; POST-PROCESS-NEXT: [[TMP101:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP100]], i32 0, i32 0, i64 52 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP101]], align 4 -; POST-PROCESS-NEXT: [[TMP102:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP103:%.*]] = inttoptr i32 [[TMP102]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP104:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP103]], i32 -30 -; POST-PROCESS-NEXT: [[TMP105:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP104]], i32 0, i32 0, i64 53 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP105]], align 4 -; POST-PROCESS-NEXT: [[TMP106:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP107:%.*]] = inttoptr i32 [[TMP106]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP108:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP107]], i32 -30 -; POST-PROCESS-NEXT: [[TMP109:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP108]], i32 0, i32 0, i64 54 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP109]], align 4 -; POST-PROCESS-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP99:%.*]] = add i32 [[TMP98]], -120 +; POST-PROCESS-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], 192 +; POST-PROCESS-NEXT: [[TMP101:%.*]] = inttoptr i32 [[TMP100]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP102:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP101]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP102]], align 4 +; POST-PROCESS-NEXT: [[TMP103:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP104:%.*]] = add i32 [[TMP103]], -120 +; POST-PROCESS-NEXT: [[TMP105:%.*]] = add i32 [[TMP104]], 196 +; POST-PROCESS-NEXT: [[TMP106:%.*]] = inttoptr i32 [[TMP105]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP107:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP106]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP107]], align 4 +; POST-PROCESS-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP109:%.*]] = add i32 [[TMP108]], -120 +; POST-PROCESS-NEXT: [[TMP110:%.*]] = add i32 [[TMP109]], 200 ; POST-PROCESS-NEXT: [[TMP111:%.*]] = inttoptr i32 [[TMP110]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP112:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP111]], i32 -30 -; POST-PROCESS-NEXT: [[TMP113:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP112]], i32 0, i32 0, i64 55 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP113]], align 4 -; POST-PROCESS-NEXT: [[TMP114:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP115:%.*]] = inttoptr i32 [[TMP114]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP116:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP115]], i32 -30 -; POST-PROCESS-NEXT: [[TMP117:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP116]], i32 0, i32 0, i64 56 +; POST-PROCESS-NEXT: [[TMP112:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP111]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP112]], align 4 +; POST-PROCESS-NEXT: [[TMP113:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP114:%.*]] = add i32 [[TMP113]], -120 +; POST-PROCESS-NEXT: [[TMP115:%.*]] = add i32 [[TMP114]], 204 +; POST-PROCESS-NEXT: [[TMP116:%.*]] = inttoptr i32 [[TMP115]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP117:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP116]], i32 0 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP117]], align 4 -; POST-PROCESS-NEXT: [[TMP118:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP119:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @main.resume.0 to i64)) -; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP118]], i64 [[TMP119]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META17:![0-9]+]], !continuation.returnedRegistercount !17 +; POST-PROCESS-NEXT: [[TMP118:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP119:%.*]] = add i32 [[TMP118]], -120 +; POST-PROCESS-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], 208 +; POST-PROCESS-NEXT: [[TMP121:%.*]] = inttoptr i32 [[TMP120]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP122:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP121]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP122]], align 4 +; POST-PROCESS-NEXT: [[TMP123:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP124:%.*]] = add i32 [[TMP123]], -120 +; POST-PROCESS-NEXT: [[TMP125:%.*]] = add i32 [[TMP124]], 212 +; POST-PROCESS-NEXT: [[TMP126:%.*]] = inttoptr i32 [[TMP125]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP127:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP126]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP127]], align 4 +; POST-PROCESS-NEXT: [[TMP128:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP129:%.*]] = add i32 [[TMP128]], -120 +; POST-PROCESS-NEXT: [[TMP130:%.*]] = add i32 [[TMP129]], 216 +; POST-PROCESS-NEXT: [[TMP131:%.*]] = inttoptr i32 [[TMP130]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP132:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP131]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP132]], align 4 +; POST-PROCESS-NEXT: [[TMP133:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP134:%.*]] = add i32 [[TMP133]], -120 +; POST-PROCESS-NEXT: [[TMP135:%.*]] = add i32 [[TMP134]], 220 +; POST-PROCESS-NEXT: [[TMP136:%.*]] = inttoptr i32 [[TMP135]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP137:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP136]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP137]], align 4 +; POST-PROCESS-NEXT: [[TMP138:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP139:%.*]] = add i32 [[TMP138]], -120 +; POST-PROCESS-NEXT: [[TMP140:%.*]] = add i32 [[TMP139]], 224 +; POST-PROCESS-NEXT: [[TMP141:%.*]] = inttoptr i32 [[TMP140]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP142:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP141]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP142]], align 4 +; POST-PROCESS-NEXT: [[TMP143:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP144:%.*]] = add i32 [[TMP143]], 108 +; POST-PROCESS-NEXT: store i32 [[TMP144]], ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP145:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP146:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @main.resume.0 to i64)) +; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP145]], i64 [[TMP146]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META17:![0-9]+]], !continuation.returnedRegistercount !17 ; POST-PROCESS-NEXT: unreachable ; ; ; POST-PROCESS-LABEL: define dso_local void @main.resume.0( -; POST-PROCESS-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META17]] !continuation [[META20]] { +; POST-PROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META17]] !continuation [[META20]] { ; POST-PROCESS-NEXT: entryresume.0: ; POST-PROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; POST-PROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP25]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP26]], i32 -30 -; POST-PROCESS-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP27]], i32 0, i32 0, i64 30 -; POST-PROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(21) [[TMP28]], align 4 -; POST-PROCESS-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -108 +; POST-PROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 +; POST-PROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], -120 +; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], 120 ; POST-PROCESS-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP31]], i32 -30 -; POST-PROCESS-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP32]], i32 0, i32 0, i64 31 -; POST-PROCESS-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(21) [[TMP33]], align 4 -; POST-PROCESS-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP36:%.*]] = inttoptr i32 [[TMP35]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP37:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP36]], i32 -30 -; POST-PROCESS-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP37]], i32 0, i32 0, i64 32 +; POST-PROCESS-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP31]], i32 0 +; POST-PROCESS-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(21) [[TMP32]], align 4 +; POST-PROCESS-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP35:%.*]] = add i32 [[TMP34]], -120 +; POST-PROCESS-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], 124 +; POST-PROCESS-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP36]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP37]], i32 0 ; POST-PROCESS-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(21) [[TMP38]], align 4 ; POST-PROCESS-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP41:%.*]] = inttoptr i32 [[TMP40]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP41]], i32 -30 -; POST-PROCESS-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP42]], i32 0, i32 0, i64 33 -; POST-PROCESS-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(21) [[TMP43]], align 4 -; POST-PROCESS-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP46:%.*]] = inttoptr i32 [[TMP45]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP46]], i32 -30 -; POST-PROCESS-NEXT: [[TMP48:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP47]], i32 0, i32 0, i64 34 -; POST-PROCESS-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(21) [[TMP48]], align 4 -; POST-PROCESS-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP51:%.*]] = inttoptr i32 [[TMP50]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP52:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP51]], i32 -30 -; POST-PROCESS-NEXT: [[TMP53:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP52]], i32 0, i32 0, i64 35 -; POST-PROCESS-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(21) [[TMP53]], align 4 -; POST-PROCESS-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP56:%.*]] = inttoptr i32 [[TMP55]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP56]], i32 -30 -; POST-PROCESS-NEXT: [[TMP58:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP57]], i32 0, i32 0, i64 36 -; POST-PROCESS-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(21) [[TMP58]], align 4 -; POST-PROCESS-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], -120 +; POST-PROCESS-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], 128 +; POST-PROCESS-NEXT: [[TMP43:%.*]] = inttoptr i32 [[TMP42]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP43]], i32 0 +; POST-PROCESS-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(21) [[TMP44]], align 4 +; POST-PROCESS-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], -120 +; POST-PROCESS-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 132 +; POST-PROCESS-NEXT: [[TMP49:%.*]] = inttoptr i32 [[TMP48]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP49]], i32 0 +; POST-PROCESS-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(21) [[TMP50]], align 4 +; POST-PROCESS-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP53:%.*]] = add i32 [[TMP52]], -120 +; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[TMP53]], 136 +; POST-PROCESS-NEXT: [[TMP55:%.*]] = inttoptr i32 [[TMP54]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP55]], i32 0 +; POST-PROCESS-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(21) [[TMP56]], align 4 +; POST-PROCESS-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP59:%.*]] = add i32 [[TMP58]], -120 +; POST-PROCESS-NEXT: [[TMP60:%.*]] = add i32 [[TMP59]], 140 ; POST-PROCESS-NEXT: [[TMP61:%.*]] = inttoptr i32 [[TMP60]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP62:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP61]], i32 -30 -; POST-PROCESS-NEXT: [[TMP63:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP62]], i32 0, i32 0, i64 37 -; POST-PROCESS-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(21) [[TMP63]], align 4 -; POST-PROCESS-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP66:%.*]] = inttoptr i32 [[TMP65]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP67:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP66]], i32 -30 -; POST-PROCESS-NEXT: [[TMP68:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP67]], i32 0, i32 0, i64 38 +; POST-PROCESS-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP61]], i32 0 +; POST-PROCESS-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(21) [[TMP62]], align 4 +; POST-PROCESS-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP65:%.*]] = add i32 [[TMP64]], -120 +; POST-PROCESS-NEXT: [[TMP66:%.*]] = add i32 [[TMP65]], 144 +; POST-PROCESS-NEXT: [[TMP67:%.*]] = inttoptr i32 [[TMP66]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP67]], i32 0 ; POST-PROCESS-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(21) [[TMP68]], align 4 ; POST-PROCESS-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP71:%.*]] = inttoptr i32 [[TMP70]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP71]], i32 -30 -; POST-PROCESS-NEXT: [[TMP73:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP72]], i32 0, i32 0, i64 39 -; POST-PROCESS-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(21) [[TMP73]], align 4 -; POST-PROCESS-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP76:%.*]] = inttoptr i32 [[TMP75]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP77:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP76]], i32 -30 -; POST-PROCESS-NEXT: [[TMP78:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP77]], i32 0, i32 0, i64 40 -; POST-PROCESS-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(21) [[TMP78]], align 4 -; POST-PROCESS-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP81:%.*]] = inttoptr i32 [[TMP80]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP82:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP81]], i32 -30 -; POST-PROCESS-NEXT: [[TMP83:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP82]], i32 0, i32 0, i64 41 -; POST-PROCESS-NEXT: [[TMP84:%.*]] = load i32, ptr addrspace(21) [[TMP83]], align 4 -; POST-PROCESS-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP86:%.*]] = inttoptr i32 [[TMP85]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP87:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP86]], i32 -30 -; POST-PROCESS-NEXT: [[TMP88:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP87]], i32 0, i32 0, i64 42 -; POST-PROCESS-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(21) [[TMP88]], align 4 -; POST-PROCESS-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP71:%.*]] = add i32 [[TMP70]], -120 +; POST-PROCESS-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], 148 +; POST-PROCESS-NEXT: [[TMP73:%.*]] = inttoptr i32 [[TMP72]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP73]], i32 0 +; POST-PROCESS-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(21) [[TMP74]], align 4 +; POST-PROCESS-NEXT: [[TMP76:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP77:%.*]] = add i32 [[TMP76]], -120 +; POST-PROCESS-NEXT: [[TMP78:%.*]] = add i32 [[TMP77]], 152 +; POST-PROCESS-NEXT: [[TMP79:%.*]] = inttoptr i32 [[TMP78]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP79]], i32 0 +; POST-PROCESS-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(21) [[TMP80]], align 4 +; POST-PROCESS-NEXT: [[TMP82:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP83:%.*]] = add i32 [[TMP82]], -120 +; POST-PROCESS-NEXT: [[TMP84:%.*]] = add i32 [[TMP83]], 156 +; POST-PROCESS-NEXT: [[TMP85:%.*]] = inttoptr i32 [[TMP84]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP85]], i32 0 +; POST-PROCESS-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(21) [[TMP86]], align 4 +; POST-PROCESS-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP89:%.*]] = add i32 [[TMP88]], -120 +; POST-PROCESS-NEXT: [[TMP90:%.*]] = add i32 [[TMP89]], 160 ; POST-PROCESS-NEXT: [[TMP91:%.*]] = inttoptr i32 [[TMP90]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP92:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP91]], i32 -30 -; POST-PROCESS-NEXT: [[TMP93:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP92]], i32 0, i32 0, i64 43 -; POST-PROCESS-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(21) [[TMP93]], align 4 -; POST-PROCESS-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP96:%.*]] = inttoptr i32 [[TMP95]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP97:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP96]], i32 -30 -; POST-PROCESS-NEXT: [[TMP98:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP97]], i32 0, i32 0, i64 44 +; POST-PROCESS-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP91]], i32 0 +; POST-PROCESS-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(21) [[TMP92]], align 4 +; POST-PROCESS-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP95:%.*]] = add i32 [[TMP94]], -120 +; POST-PROCESS-NEXT: [[TMP96:%.*]] = add i32 [[TMP95]], 164 +; POST-PROCESS-NEXT: [[TMP97:%.*]] = inttoptr i32 [[TMP96]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP97]], i32 0 ; POST-PROCESS-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(21) [[TMP98]], align 4 ; POST-PROCESS-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP101:%.*]] = inttoptr i32 [[TMP100]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP102:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP101]], i32 -30 -; POST-PROCESS-NEXT: [[TMP103:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP102]], i32 0, i32 0, i64 45 -; POST-PROCESS-NEXT: [[TMP104:%.*]] = load i32, ptr addrspace(21) [[TMP103]], align 4 -; POST-PROCESS-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP106:%.*]] = inttoptr i32 [[TMP105]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP107:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP106]], i32 -30 -; POST-PROCESS-NEXT: [[TMP108:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP107]], i32 0, i32 0, i64 46 -; POST-PROCESS-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(21) [[TMP108]], align 4 -; POST-PROCESS-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP111:%.*]] = inttoptr i32 [[TMP110]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP112:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP111]], i32 -30 -; POST-PROCESS-NEXT: [[TMP113:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP112]], i32 0, i32 0, i64 47 -; POST-PROCESS-NEXT: [[TMP114:%.*]] = load i32, ptr addrspace(21) [[TMP113]], align 4 -; POST-PROCESS-NEXT: [[TMP115:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP116:%.*]] = inttoptr i32 [[TMP115]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP117:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP116]], i32 -30 -; POST-PROCESS-NEXT: [[TMP118:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP117]], i32 0, i32 0, i64 48 -; POST-PROCESS-NEXT: [[TMP119:%.*]] = load i32, ptr addrspace(21) [[TMP118]], align 4 -; POST-PROCESS-NEXT: [[TMP120:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP101:%.*]] = add i32 [[TMP100]], -120 +; POST-PROCESS-NEXT: [[TMP102:%.*]] = add i32 [[TMP101]], 168 +; POST-PROCESS-NEXT: [[TMP103:%.*]] = inttoptr i32 [[TMP102]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP103]], i32 0 +; POST-PROCESS-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(21) [[TMP104]], align 4 +; POST-PROCESS-NEXT: [[TMP106:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP107:%.*]] = add i32 [[TMP106]], -120 +; POST-PROCESS-NEXT: [[TMP108:%.*]] = add i32 [[TMP107]], 172 +; POST-PROCESS-NEXT: [[TMP109:%.*]] = inttoptr i32 [[TMP108]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP110:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP109]], i32 0 +; POST-PROCESS-NEXT: [[TMP111:%.*]] = load i32, ptr addrspace(21) [[TMP110]], align 4 +; POST-PROCESS-NEXT: [[TMP112:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP113:%.*]] = add i32 [[TMP112]], -120 +; POST-PROCESS-NEXT: [[TMP114:%.*]] = add i32 [[TMP113]], 176 +; POST-PROCESS-NEXT: [[TMP115:%.*]] = inttoptr i32 [[TMP114]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP116:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP115]], i32 0 +; POST-PROCESS-NEXT: [[TMP117:%.*]] = load i32, ptr addrspace(21) [[TMP116]], align 4 +; POST-PROCESS-NEXT: [[TMP118:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP119:%.*]] = add i32 [[TMP118]], -120 +; POST-PROCESS-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], 180 ; POST-PROCESS-NEXT: [[TMP121:%.*]] = inttoptr i32 [[TMP120]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP122:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP121]], i32 -30 -; POST-PROCESS-NEXT: [[TMP123:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP122]], i32 0, i32 0, i64 49 -; POST-PROCESS-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(21) [[TMP123]], align 4 -; POST-PROCESS-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP126:%.*]] = inttoptr i32 [[TMP125]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP127:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP126]], i32 -30 -; POST-PROCESS-NEXT: [[TMP128:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP127]], i32 0, i32 0, i64 50 +; POST-PROCESS-NEXT: [[TMP122:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP121]], i32 0 +; POST-PROCESS-NEXT: [[TMP123:%.*]] = load i32, ptr addrspace(21) [[TMP122]], align 4 +; POST-PROCESS-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP125:%.*]] = add i32 [[TMP124]], -120 +; POST-PROCESS-NEXT: [[TMP126:%.*]] = add i32 [[TMP125]], 184 +; POST-PROCESS-NEXT: [[TMP127:%.*]] = inttoptr i32 [[TMP126]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP127]], i32 0 ; POST-PROCESS-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(21) [[TMP128]], align 4 ; POST-PROCESS-NEXT: [[TMP130:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP131:%.*]] = inttoptr i32 [[TMP130]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP132:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP131]], i32 -30 -; POST-PROCESS-NEXT: [[TMP133:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP132]], i32 0, i32 0, i64 51 -; POST-PROCESS-NEXT: [[TMP134:%.*]] = load i32, ptr addrspace(21) [[TMP133]], align 4 -; POST-PROCESS-NEXT: [[TMP135:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP136:%.*]] = inttoptr i32 [[TMP135]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP137:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP136]], i32 -30 -; POST-PROCESS-NEXT: [[TMP138:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP137]], i32 0, i32 0, i64 52 -; POST-PROCESS-NEXT: [[TMP139:%.*]] = load i32, ptr addrspace(21) [[TMP138]], align 4 -; POST-PROCESS-NEXT: [[TMP140:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP141:%.*]] = inttoptr i32 [[TMP140]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP142:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP141]], i32 -30 -; POST-PROCESS-NEXT: [[TMP143:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP142]], i32 0, i32 0, i64 53 -; POST-PROCESS-NEXT: [[TMP144:%.*]] = load i32, ptr addrspace(21) [[TMP143]], align 4 -; POST-PROCESS-NEXT: [[TMP145:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP146:%.*]] = inttoptr i32 [[TMP145]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP147:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP146]], i32 -30 -; POST-PROCESS-NEXT: [[TMP148:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP147]], i32 0, i32 0, i64 54 -; POST-PROCESS-NEXT: [[TMP149:%.*]] = load i32, ptr addrspace(21) [[TMP148]], align 4 -; POST-PROCESS-NEXT: [[TMP150:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP131:%.*]] = add i32 [[TMP130]], -120 +; POST-PROCESS-NEXT: [[TMP132:%.*]] = add i32 [[TMP131]], 188 +; POST-PROCESS-NEXT: [[TMP133:%.*]] = inttoptr i32 [[TMP132]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP134:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP133]], i32 0 +; POST-PROCESS-NEXT: [[TMP135:%.*]] = load i32, ptr addrspace(21) [[TMP134]], align 4 +; POST-PROCESS-NEXT: [[TMP136:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP137:%.*]] = add i32 [[TMP136]], -120 +; POST-PROCESS-NEXT: [[TMP138:%.*]] = add i32 [[TMP137]], 192 +; POST-PROCESS-NEXT: [[TMP139:%.*]] = inttoptr i32 [[TMP138]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP139]], i32 0 +; POST-PROCESS-NEXT: [[TMP141:%.*]] = load i32, ptr addrspace(21) [[TMP140]], align 4 +; POST-PROCESS-NEXT: [[TMP142:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP143:%.*]] = add i32 [[TMP142]], -120 +; POST-PROCESS-NEXT: [[TMP144:%.*]] = add i32 [[TMP143]], 196 +; POST-PROCESS-NEXT: [[TMP145:%.*]] = inttoptr i32 [[TMP144]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP146:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP145]], i32 0 +; POST-PROCESS-NEXT: [[TMP147:%.*]] = load i32, ptr addrspace(21) [[TMP146]], align 4 +; POST-PROCESS-NEXT: [[TMP148:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP149:%.*]] = add i32 [[TMP148]], -120 +; POST-PROCESS-NEXT: [[TMP150:%.*]] = add i32 [[TMP149]], 200 ; POST-PROCESS-NEXT: [[TMP151:%.*]] = inttoptr i32 [[TMP150]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP152:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP151]], i32 -30 -; POST-PROCESS-NEXT: [[TMP153:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP152]], i32 0, i32 0, i64 55 -; POST-PROCESS-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(21) [[TMP153]], align 4 -; POST-PROCESS-NEXT: [[TMP155:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP156:%.*]] = inttoptr i32 [[TMP155]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP157:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP156]], i32 -30 -; POST-PROCESS-NEXT: [[TMP158:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP157]], i32 0, i32 0, i64 56 +; POST-PROCESS-NEXT: [[TMP152:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP151]], i32 0 +; POST-PROCESS-NEXT: [[TMP153:%.*]] = load i32, ptr addrspace(21) [[TMP152]], align 4 +; POST-PROCESS-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP155:%.*]] = add i32 [[TMP154]], -120 +; POST-PROCESS-NEXT: [[TMP156:%.*]] = add i32 [[TMP155]], 204 +; POST-PROCESS-NEXT: [[TMP157:%.*]] = inttoptr i32 [[TMP156]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP157]], i32 0 ; POST-PROCESS-NEXT: [[TMP159:%.*]] = load i32, ptr addrspace(21) [[TMP158]], align 4 -; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; POST-PROCESS-NEXT: [[TMP160:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP161:%.*]] = add i32 [[TMP160]], -120 +; POST-PROCESS-NEXT: [[TMP162:%.*]] = add i32 [[TMP161]], 208 +; POST-PROCESS-NEXT: [[TMP163:%.*]] = inttoptr i32 [[TMP162]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP164:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP163]], i32 0 +; POST-PROCESS-NEXT: [[TMP165:%.*]] = load i32, ptr addrspace(21) [[TMP164]], align 4 +; POST-PROCESS-NEXT: [[TMP166:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP167:%.*]] = add i32 [[TMP166]], -120 +; POST-PROCESS-NEXT: [[TMP168:%.*]] = add i32 [[TMP167]], 212 +; POST-PROCESS-NEXT: [[TMP169:%.*]] = inttoptr i32 [[TMP168]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP170:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP169]], i32 0 +; POST-PROCESS-NEXT: [[TMP171:%.*]] = load i32, ptr addrspace(21) [[TMP170]], align 4 +; POST-PROCESS-NEXT: [[TMP172:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP173:%.*]] = add i32 [[TMP172]], -120 +; POST-PROCESS-NEXT: [[TMP174:%.*]] = add i32 [[TMP173]], 216 +; POST-PROCESS-NEXT: [[TMP175:%.*]] = inttoptr i32 [[TMP174]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP176:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP175]], i32 0 +; POST-PROCESS-NEXT: [[TMP177:%.*]] = load i32, ptr addrspace(21) [[TMP176]], align 4 +; POST-PROCESS-NEXT: [[TMP178:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP179:%.*]] = add i32 [[TMP178]], -120 +; POST-PROCESS-NEXT: [[TMP180:%.*]] = add i32 [[TMP179]], 220 +; POST-PROCESS-NEXT: [[TMP181:%.*]] = inttoptr i32 [[TMP180]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP182:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP181]], i32 0 +; POST-PROCESS-NEXT: [[TMP183:%.*]] = load i32, ptr addrspace(21) [[TMP182]], align 4 +; POST-PROCESS-NEXT: [[TMP184:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP185:%.*]] = add i32 [[TMP184]], -120 +; POST-PROCESS-NEXT: [[TMP186:%.*]] = add i32 [[TMP185]], 224 +; POST-PROCESS-NEXT: [[TMP187:%.*]] = inttoptr i32 [[TMP186]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP188:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP187]], i32 0 +; POST-PROCESS-NEXT: [[TMP189:%.*]] = load i32, ptr addrspace(21) [[TMP188]], align 4 +; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; POST-PROCESS-NEXT: [[TMP160:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP161:%.*]] = add i32 [[TMP160]], -108 -; POST-PROCESS-NEXT: store i32 [[TMP161]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: call void @continuation.complete() +; POST-PROCESS-NEXT: ret void +; POST-PROCESS: entryresume.0.split: ; POST-PROCESS-NEXT: unreachable ; ; @@ -1244,150 +1270,177 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP26]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP27]], i32 -30 -; POST-PROCESS-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP28]], i32 0, i32 0, i64 30 -; POST-PROCESS-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(21) [[TMP29]], align 4 -; POST-PROCESS-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP32:%.*]] = inttoptr i32 [[TMP31]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP32]], i32 -30 -; POST-PROCESS-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP33]], i32 0, i32 0, i64 31 -; POST-PROCESS-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(21) [[TMP34]], align 4 -; POST-PROCESS-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP36]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP37]], i32 -30 -; POST-PROCESS-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP38]], i32 0, i32 0, i64 32 -; POST-PROCESS-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(21) [[TMP39]], align 4 -; POST-PROCESS-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP42:%.*]] = inttoptr i32 [[TMP41]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP42]], i32 -30 -; POST-PROCESS-NEXT: [[TMP44:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP43]], i32 0, i32 0, i64 33 -; POST-PROCESS-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(21) [[TMP44]], align 4 -; POST-PROCESS-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP27:%.*]] = add i32 [[TMP26]], -120 +; POST-PROCESS-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 120 +; POST-PROCESS-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP28]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP29]], i32 0 +; POST-PROCESS-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(21) [[TMP30]], align 4 +; POST-PROCESS-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP33:%.*]] = add i32 [[TMP32]], -120 +; POST-PROCESS-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], 124 +; POST-PROCESS-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP34]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP35]], i32 0 +; POST-PROCESS-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(21) [[TMP36]], align 4 +; POST-PROCESS-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP39:%.*]] = add i32 [[TMP38]], -120 +; POST-PROCESS-NEXT: [[TMP40:%.*]] = add i32 [[TMP39]], 128 +; POST-PROCESS-NEXT: [[TMP41:%.*]] = inttoptr i32 [[TMP40]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP41]], i32 0 +; POST-PROCESS-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(21) [[TMP42]], align 4 +; POST-PROCESS-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP45:%.*]] = add i32 [[TMP44]], -120 +; POST-PROCESS-NEXT: [[TMP46:%.*]] = add i32 [[TMP45]], 132 ; POST-PROCESS-NEXT: [[TMP47:%.*]] = inttoptr i32 [[TMP46]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP47]], i32 -30 -; POST-PROCESS-NEXT: [[TMP49:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP48]], i32 0, i32 0, i64 34 -; POST-PROCESS-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(21) [[TMP49]], align 4 -; POST-PROCESS-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP52:%.*]] = inttoptr i32 [[TMP51]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP53:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP52]], i32 -30 -; POST-PROCESS-NEXT: [[TMP54:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP53]], i32 0, i32 0, i64 35 +; POST-PROCESS-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP47]], i32 0 +; POST-PROCESS-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(21) [[TMP48]], align 4 +; POST-PROCESS-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP51:%.*]] = add i32 [[TMP50]], -120 +; POST-PROCESS-NEXT: [[TMP52:%.*]] = add i32 [[TMP51]], 136 +; POST-PROCESS-NEXT: [[TMP53:%.*]] = inttoptr i32 [[TMP52]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP53]], i32 0 ; POST-PROCESS-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(21) [[TMP54]], align 4 ; POST-PROCESS-NEXT: [[TMP56:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP57:%.*]] = inttoptr i32 [[TMP56]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP58:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP57]], i32 -30 -; POST-PROCESS-NEXT: [[TMP59:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP58]], i32 0, i32 0, i64 36 -; POST-PROCESS-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(21) [[TMP59]], align 4 -; POST-PROCESS-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP62:%.*]] = inttoptr i32 [[TMP61]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP63:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP62]], i32 -30 -; POST-PROCESS-NEXT: [[TMP64:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP63]], i32 0, i32 0, i64 37 -; POST-PROCESS-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(21) [[TMP64]], align 4 -; POST-PROCESS-NEXT: [[TMP66:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP67:%.*]] = inttoptr i32 [[TMP66]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP68:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP67]], i32 -30 -; POST-PROCESS-NEXT: [[TMP69:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP68]], i32 0, i32 0, i64 38 -; POST-PROCESS-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(21) [[TMP69]], align 4 -; POST-PROCESS-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP72:%.*]] = inttoptr i32 [[TMP71]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP73:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP72]], i32 -30 -; POST-PROCESS-NEXT: [[TMP74:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP73]], i32 0, i32 0, i64 39 -; POST-PROCESS-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(21) [[TMP74]], align 4 -; POST-PROCESS-NEXT: [[TMP76:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP57:%.*]] = add i32 [[TMP56]], -120 +; POST-PROCESS-NEXT: [[TMP58:%.*]] = add i32 [[TMP57]], 140 +; POST-PROCESS-NEXT: [[TMP59:%.*]] = inttoptr i32 [[TMP58]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP59]], i32 0 +; POST-PROCESS-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(21) [[TMP60]], align 4 +; POST-PROCESS-NEXT: [[TMP62:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP63:%.*]] = add i32 [[TMP62]], -120 +; POST-PROCESS-NEXT: [[TMP64:%.*]] = add i32 [[TMP63]], 144 +; POST-PROCESS-NEXT: [[TMP65:%.*]] = inttoptr i32 [[TMP64]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP65]], i32 0 +; POST-PROCESS-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(21) [[TMP66]], align 4 +; POST-PROCESS-NEXT: [[TMP68:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP69:%.*]] = add i32 [[TMP68]], -120 +; POST-PROCESS-NEXT: [[TMP70:%.*]] = add i32 [[TMP69]], 148 +; POST-PROCESS-NEXT: [[TMP71:%.*]] = inttoptr i32 [[TMP70]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP71]], i32 0 +; POST-PROCESS-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(21) [[TMP72]], align 4 +; POST-PROCESS-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP75:%.*]] = add i32 [[TMP74]], -120 +; POST-PROCESS-NEXT: [[TMP76:%.*]] = add i32 [[TMP75]], 152 ; POST-PROCESS-NEXT: [[TMP77:%.*]] = inttoptr i32 [[TMP76]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP78:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP77]], i32 -30 -; POST-PROCESS-NEXT: [[TMP79:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP78]], i32 0, i32 0, i64 40 -; POST-PROCESS-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(21) [[TMP79]], align 4 -; POST-PROCESS-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP82:%.*]] = inttoptr i32 [[TMP81]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP83:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP82]], i32 -30 -; POST-PROCESS-NEXT: [[TMP84:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP83]], i32 0, i32 0, i64 41 +; POST-PROCESS-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP77]], i32 0 +; POST-PROCESS-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(21) [[TMP78]], align 4 +; POST-PROCESS-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP81:%.*]] = add i32 [[TMP80]], -120 +; POST-PROCESS-NEXT: [[TMP82:%.*]] = add i32 [[TMP81]], 156 +; POST-PROCESS-NEXT: [[TMP83:%.*]] = inttoptr i32 [[TMP82]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP83]], i32 0 ; POST-PROCESS-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(21) [[TMP84]], align 4 ; POST-PROCESS-NEXT: [[TMP86:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP87:%.*]] = inttoptr i32 [[TMP86]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP88:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP87]], i32 -30 -; POST-PROCESS-NEXT: [[TMP89:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP88]], i32 0, i32 0, i64 42 -; POST-PROCESS-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(21) [[TMP89]], align 4 -; POST-PROCESS-NEXT: [[TMP91:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP92:%.*]] = inttoptr i32 [[TMP91]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP93:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP92]], i32 -30 -; POST-PROCESS-NEXT: [[TMP94:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP93]], i32 0, i32 0, i64 43 -; POST-PROCESS-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(21) [[TMP94]], align 4 -; POST-PROCESS-NEXT: [[TMP96:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP97:%.*]] = inttoptr i32 [[TMP96]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP98:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP97]], i32 -30 -; POST-PROCESS-NEXT: [[TMP99:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP98]], i32 0, i32 0, i64 44 -; POST-PROCESS-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(21) [[TMP99]], align 4 -; POST-PROCESS-NEXT: [[TMP101:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP102:%.*]] = inttoptr i32 [[TMP101]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP103:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP102]], i32 -30 -; POST-PROCESS-NEXT: [[TMP104:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP103]], i32 0, i32 0, i64 45 -; POST-PROCESS-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(21) [[TMP104]], align 4 -; POST-PROCESS-NEXT: [[TMP106:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP87:%.*]] = add i32 [[TMP86]], -120 +; POST-PROCESS-NEXT: [[TMP88:%.*]] = add i32 [[TMP87]], 160 +; POST-PROCESS-NEXT: [[TMP89:%.*]] = inttoptr i32 [[TMP88]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP89]], i32 0 +; POST-PROCESS-NEXT: [[TMP91:%.*]] = load i32, ptr addrspace(21) [[TMP90]], align 4 +; POST-PROCESS-NEXT: [[TMP92:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP93:%.*]] = add i32 [[TMP92]], -120 +; POST-PROCESS-NEXT: [[TMP94:%.*]] = add i32 [[TMP93]], 164 +; POST-PROCESS-NEXT: [[TMP95:%.*]] = inttoptr i32 [[TMP94]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP95]], i32 0 +; POST-PROCESS-NEXT: [[TMP97:%.*]] = load i32, ptr addrspace(21) [[TMP96]], align 4 +; POST-PROCESS-NEXT: [[TMP98:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP99:%.*]] = add i32 [[TMP98]], -120 +; POST-PROCESS-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], 168 +; POST-PROCESS-NEXT: [[TMP101:%.*]] = inttoptr i32 [[TMP100]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP102:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP101]], i32 0 +; POST-PROCESS-NEXT: [[TMP103:%.*]] = load i32, ptr addrspace(21) [[TMP102]], align 4 +; POST-PROCESS-NEXT: [[TMP104:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP105:%.*]] = add i32 [[TMP104]], -120 +; POST-PROCESS-NEXT: [[TMP106:%.*]] = add i32 [[TMP105]], 172 ; POST-PROCESS-NEXT: [[TMP107:%.*]] = inttoptr i32 [[TMP106]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP108:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP107]], i32 -30 -; POST-PROCESS-NEXT: [[TMP109:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP108]], i32 0, i32 0, i64 46 -; POST-PROCESS-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(21) [[TMP109]], align 4 -; POST-PROCESS-NEXT: [[TMP111:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP112:%.*]] = inttoptr i32 [[TMP111]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP113:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP112]], i32 -30 -; POST-PROCESS-NEXT: [[TMP114:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP113]], i32 0, i32 0, i64 47 +; POST-PROCESS-NEXT: [[TMP108:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP107]], i32 0 +; POST-PROCESS-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(21) [[TMP108]], align 4 +; POST-PROCESS-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP111:%.*]] = add i32 [[TMP110]], -120 +; POST-PROCESS-NEXT: [[TMP112:%.*]] = add i32 [[TMP111]], 176 +; POST-PROCESS-NEXT: [[TMP113:%.*]] = inttoptr i32 [[TMP112]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP114:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP113]], i32 0 ; POST-PROCESS-NEXT: [[TMP115:%.*]] = load i32, ptr addrspace(21) [[TMP114]], align 4 ; POST-PROCESS-NEXT: [[TMP116:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP117:%.*]] = inttoptr i32 [[TMP116]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP118:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP117]], i32 -30 -; POST-PROCESS-NEXT: [[TMP119:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP118]], i32 0, i32 0, i64 48 -; POST-PROCESS-NEXT: [[TMP120:%.*]] = load i32, ptr addrspace(21) [[TMP119]], align 4 -; POST-PROCESS-NEXT: [[TMP121:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP122:%.*]] = inttoptr i32 [[TMP121]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP123:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP122]], i32 -30 -; POST-PROCESS-NEXT: [[TMP124:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP123]], i32 0, i32 0, i64 49 -; POST-PROCESS-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(21) [[TMP124]], align 4 -; POST-PROCESS-NEXT: [[TMP126:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP127:%.*]] = inttoptr i32 [[TMP126]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP128:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP127]], i32 -30 -; POST-PROCESS-NEXT: [[TMP129:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP128]], i32 0, i32 0, i64 50 -; POST-PROCESS-NEXT: [[TMP130:%.*]] = load i32, ptr addrspace(21) [[TMP129]], align 4 -; POST-PROCESS-NEXT: [[TMP131:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP132:%.*]] = inttoptr i32 [[TMP131]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP133:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP132]], i32 -30 -; POST-PROCESS-NEXT: [[TMP134:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP133]], i32 0, i32 0, i64 51 -; POST-PROCESS-NEXT: [[TMP135:%.*]] = load i32, ptr addrspace(21) [[TMP134]], align 4 -; POST-PROCESS-NEXT: [[TMP136:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP117:%.*]] = add i32 [[TMP116]], -120 +; POST-PROCESS-NEXT: [[TMP118:%.*]] = add i32 [[TMP117]], 180 +; POST-PROCESS-NEXT: [[TMP119:%.*]] = inttoptr i32 [[TMP118]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP120:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP119]], i32 0 +; POST-PROCESS-NEXT: [[TMP121:%.*]] = load i32, ptr addrspace(21) [[TMP120]], align 4 +; POST-PROCESS-NEXT: [[TMP122:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP123:%.*]] = add i32 [[TMP122]], -120 +; POST-PROCESS-NEXT: [[TMP124:%.*]] = add i32 [[TMP123]], 184 +; POST-PROCESS-NEXT: [[TMP125:%.*]] = inttoptr i32 [[TMP124]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP126:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP125]], i32 0 +; POST-PROCESS-NEXT: [[TMP127:%.*]] = load i32, ptr addrspace(21) [[TMP126]], align 4 +; POST-PROCESS-NEXT: [[TMP128:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP129:%.*]] = add i32 [[TMP128]], -120 +; POST-PROCESS-NEXT: [[TMP130:%.*]] = add i32 [[TMP129]], 188 +; POST-PROCESS-NEXT: [[TMP131:%.*]] = inttoptr i32 [[TMP130]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP132:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP131]], i32 0 +; POST-PROCESS-NEXT: [[TMP133:%.*]] = load i32, ptr addrspace(21) [[TMP132]], align 4 +; POST-PROCESS-NEXT: [[TMP134:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP135:%.*]] = add i32 [[TMP134]], -120 +; POST-PROCESS-NEXT: [[TMP136:%.*]] = add i32 [[TMP135]], 192 ; POST-PROCESS-NEXT: [[TMP137:%.*]] = inttoptr i32 [[TMP136]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP138:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP137]], i32 -30 -; POST-PROCESS-NEXT: [[TMP139:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP138]], i32 0, i32 0, i64 52 -; POST-PROCESS-NEXT: [[TMP140:%.*]] = load i32, ptr addrspace(21) [[TMP139]], align 4 -; POST-PROCESS-NEXT: [[TMP141:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP142:%.*]] = inttoptr i32 [[TMP141]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP143:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP142]], i32 -30 -; POST-PROCESS-NEXT: [[TMP144:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP143]], i32 0, i32 0, i64 53 +; POST-PROCESS-NEXT: [[TMP138:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP137]], i32 0 +; POST-PROCESS-NEXT: [[TMP139:%.*]] = load i32, ptr addrspace(21) [[TMP138]], align 4 +; POST-PROCESS-NEXT: [[TMP140:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP141:%.*]] = add i32 [[TMP140]], -120 +; POST-PROCESS-NEXT: [[TMP142:%.*]] = add i32 [[TMP141]], 196 +; POST-PROCESS-NEXT: [[TMP143:%.*]] = inttoptr i32 [[TMP142]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP144:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP143]], i32 0 ; POST-PROCESS-NEXT: [[TMP145:%.*]] = load i32, ptr addrspace(21) [[TMP144]], align 4 ; POST-PROCESS-NEXT: [[TMP146:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP147:%.*]] = inttoptr i32 [[TMP146]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP148:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP147]], i32 -30 -; POST-PROCESS-NEXT: [[TMP149:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP148]], i32 0, i32 0, i64 54 -; POST-PROCESS-NEXT: [[TMP150:%.*]] = load i32, ptr addrspace(21) [[TMP149]], align 4 -; POST-PROCESS-NEXT: [[TMP151:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP152:%.*]] = inttoptr i32 [[TMP151]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP153:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP152]], i32 -30 -; POST-PROCESS-NEXT: [[TMP154:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP153]], i32 0, i32 0, i64 55 -; POST-PROCESS-NEXT: [[TMP155:%.*]] = load i32, ptr addrspace(21) [[TMP154]], align 4 -; POST-PROCESS-NEXT: [[TMP156:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP157:%.*]] = inttoptr i32 [[TMP156]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP158:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP157]], i32 -30 -; POST-PROCESS-NEXT: [[TMP159:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(21) [[TMP158]], i32 0, i32 0, i64 56 -; POST-PROCESS-NEXT: [[TMP160:%.*]] = load i32, ptr addrspace(21) [[TMP159]], align 4 -; POST-PROCESS-NEXT: [[TMP161:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POST-PROCESS-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP161]], i32 0, i32 1 +; POST-PROCESS-NEXT: [[TMP147:%.*]] = add i32 [[TMP146]], -120 +; POST-PROCESS-NEXT: [[TMP148:%.*]] = add i32 [[TMP147]], 200 +; POST-PROCESS-NEXT: [[TMP149:%.*]] = inttoptr i32 [[TMP148]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP150:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP149]], i32 0 +; POST-PROCESS-NEXT: [[TMP151:%.*]] = load i32, ptr addrspace(21) [[TMP150]], align 4 +; POST-PROCESS-NEXT: [[TMP152:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP153:%.*]] = add i32 [[TMP152]], -120 +; POST-PROCESS-NEXT: [[TMP154:%.*]] = add i32 [[TMP153]], 204 +; POST-PROCESS-NEXT: [[TMP155:%.*]] = inttoptr i32 [[TMP154]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP156:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP155]], i32 0 +; POST-PROCESS-NEXT: [[TMP157:%.*]] = load i32, ptr addrspace(21) [[TMP156]], align 4 +; POST-PROCESS-NEXT: [[TMP158:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP159:%.*]] = add i32 [[TMP158]], -120 +; POST-PROCESS-NEXT: [[TMP160:%.*]] = add i32 [[TMP159]], 208 +; POST-PROCESS-NEXT: [[TMP161:%.*]] = inttoptr i32 [[TMP160]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP162:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP161]], i32 0 +; POST-PROCESS-NEXT: [[TMP163:%.*]] = load i32, ptr addrspace(21) [[TMP162]], align 4 +; POST-PROCESS-NEXT: [[TMP164:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP165:%.*]] = add i32 [[TMP164]], -120 +; POST-PROCESS-NEXT: [[TMP166:%.*]] = add i32 [[TMP165]], 212 +; POST-PROCESS-NEXT: [[TMP167:%.*]] = inttoptr i32 [[TMP166]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP168:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP167]], i32 0 +; POST-PROCESS-NEXT: [[TMP169:%.*]] = load i32, ptr addrspace(21) [[TMP168]], align 4 +; POST-PROCESS-NEXT: [[TMP170:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP171:%.*]] = add i32 [[TMP170]], -120 +; POST-PROCESS-NEXT: [[TMP172:%.*]] = add i32 [[TMP171]], 216 +; POST-PROCESS-NEXT: [[TMP173:%.*]] = inttoptr i32 [[TMP172]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP174:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP173]], i32 0 +; POST-PROCESS-NEXT: [[TMP175:%.*]] = load i32, ptr addrspace(21) [[TMP174]], align 4 +; POST-PROCESS-NEXT: [[TMP176:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP177:%.*]] = add i32 [[TMP176]], -120 +; POST-PROCESS-NEXT: [[TMP178:%.*]] = add i32 [[TMP177]], 220 +; POST-PROCESS-NEXT: [[TMP179:%.*]] = inttoptr i32 [[TMP178]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP180:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP179]], i32 0 +; POST-PROCESS-NEXT: [[TMP181:%.*]] = load i32, ptr addrspace(21) [[TMP180]], align 4 +; POST-PROCESS-NEXT: [[TMP182:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP183:%.*]] = add i32 [[TMP182]], -120 +; POST-PROCESS-NEXT: [[TMP184:%.*]] = add i32 [[TMP183]], 224 +; POST-PROCESS-NEXT: [[TMP185:%.*]] = inttoptr i32 [[TMP184]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP186:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP185]], i32 0 +; POST-PROCESS-NEXT: [[TMP187:%.*]] = load i32, ptr addrspace(21) [[TMP186]], align 4 +; POST-PROCESS-NEXT: [[TMP188:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POST-PROCESS-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP188]], i32 0, i32 1 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], i32 0, i32 0 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_LOAD:%.*]] = load <2 x float>, ptr [[VAL_I_FCA_0_GEP]], align 4 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[VAL_I_FCA_0_LOAD]], 0 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 ; POST-PROCESS-NEXT: [[DOTSROA_011_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; POST-PROCESS-NEXT: [[TMP162:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[TMP189:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 ; POST-PROCESS-NEXT: [[DOTSROA_011_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; POST-PROCESS-NEXT: [[TMP163:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[TMP190:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 ; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 ; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POST-PROCESS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) @@ -1414,152 +1467,179 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: store i32 [[TMP25]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP164:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP165:%.*]] = inttoptr i32 [[TMP164]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP166:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP165]], i32 -30 -; POST-PROCESS-NEXT: [[TMP167:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP166]], i32 0, i32 0, i64 30 -; POST-PROCESS-NEXT: store i32 [[TMP30]], ptr addrspace(21) [[TMP167]], align 4 -; POST-PROCESS-NEXT: [[TMP168:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP169:%.*]] = inttoptr i32 [[TMP168]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP170:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP169]], i32 -30 -; POST-PROCESS-NEXT: [[TMP171:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP170]], i32 0, i32 0, i64 31 -; POST-PROCESS-NEXT: store i32 [[TMP35]], ptr addrspace(21) [[TMP171]], align 4 -; POST-PROCESS-NEXT: [[TMP172:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP173:%.*]] = inttoptr i32 [[TMP172]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP174:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP173]], i32 -30 -; POST-PROCESS-NEXT: [[TMP175:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP174]], i32 0, i32 0, i64 32 -; POST-PROCESS-NEXT: store i32 [[TMP40]], ptr addrspace(21) [[TMP175]], align 4 -; POST-PROCESS-NEXT: [[TMP176:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP177:%.*]] = inttoptr i32 [[TMP176]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP178:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP177]], i32 -30 -; POST-PROCESS-NEXT: [[TMP179:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP178]], i32 0, i32 0, i64 33 -; POST-PROCESS-NEXT: store i32 [[TMP45]], ptr addrspace(21) [[TMP179]], align 4 -; POST-PROCESS-NEXT: [[TMP180:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP181:%.*]] = inttoptr i32 [[TMP180]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP182:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP181]], i32 -30 -; POST-PROCESS-NEXT: [[TMP183:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP182]], i32 0, i32 0, i64 34 -; POST-PROCESS-NEXT: store i32 [[TMP50]], ptr addrspace(21) [[TMP183]], align 4 -; POST-PROCESS-NEXT: [[TMP184:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP185:%.*]] = inttoptr i32 [[TMP184]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP186:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP185]], i32 -30 -; POST-PROCESS-NEXT: [[TMP187:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP186]], i32 0, i32 0, i64 35 -; POST-PROCESS-NEXT: store i32 [[TMP55]], ptr addrspace(21) [[TMP187]], align 4 -; POST-PROCESS-NEXT: [[TMP188:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP189:%.*]] = inttoptr i32 [[TMP188]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP190:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP189]], i32 -30 -; POST-PROCESS-NEXT: [[TMP191:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP190]], i32 0, i32 0, i64 36 -; POST-PROCESS-NEXT: store i32 [[TMP60]], ptr addrspace(21) [[TMP191]], align 4 -; POST-PROCESS-NEXT: [[TMP192:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP193:%.*]] = inttoptr i32 [[TMP192]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP194:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP193]], i32 -30 -; POST-PROCESS-NEXT: [[TMP195:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP194]], i32 0, i32 0, i64 37 -; POST-PROCESS-NEXT: store i32 [[TMP65]], ptr addrspace(21) [[TMP195]], align 4 +; POST-PROCESS-NEXT: [[TMP191:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP192:%.*]] = add i32 [[TMP191]], -120 +; POST-PROCESS-NEXT: [[TMP193:%.*]] = add i32 [[TMP192]], 120 +; POST-PROCESS-NEXT: [[TMP194:%.*]] = inttoptr i32 [[TMP193]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP195:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP194]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP31]], ptr addrspace(21) [[TMP195]], align 4 ; POST-PROCESS-NEXT: [[TMP196:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP197:%.*]] = inttoptr i32 [[TMP196]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP198:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP197]], i32 -30 -; POST-PROCESS-NEXT: [[TMP199:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP198]], i32 0, i32 0, i64 38 -; POST-PROCESS-NEXT: store i32 [[TMP70]], ptr addrspace(21) [[TMP199]], align 4 -; POST-PROCESS-NEXT: [[TMP200:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP201:%.*]] = inttoptr i32 [[TMP200]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP202:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP201]], i32 -30 -; POST-PROCESS-NEXT: [[TMP203:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP202]], i32 0, i32 0, i64 39 -; POST-PROCESS-NEXT: store i32 [[TMP75]], ptr addrspace(21) [[TMP203]], align 4 -; POST-PROCESS-NEXT: [[TMP204:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP205:%.*]] = inttoptr i32 [[TMP204]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP206:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP205]], i32 -30 -; POST-PROCESS-NEXT: [[TMP207:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP206]], i32 0, i32 0, i64 40 -; POST-PROCESS-NEXT: store i32 [[TMP80]], ptr addrspace(21) [[TMP207]], align 4 -; POST-PROCESS-NEXT: [[TMP208:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP197:%.*]] = add i32 [[TMP196]], -120 +; POST-PROCESS-NEXT: [[TMP198:%.*]] = add i32 [[TMP197]], 124 +; POST-PROCESS-NEXT: [[TMP199:%.*]] = inttoptr i32 [[TMP198]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP200:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP199]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP37]], ptr addrspace(21) [[TMP200]], align 4 +; POST-PROCESS-NEXT: [[TMP201:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP202:%.*]] = add i32 [[TMP201]], -120 +; POST-PROCESS-NEXT: [[TMP203:%.*]] = add i32 [[TMP202]], 128 +; POST-PROCESS-NEXT: [[TMP204:%.*]] = inttoptr i32 [[TMP203]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP205:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP204]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP43]], ptr addrspace(21) [[TMP205]], align 4 +; POST-PROCESS-NEXT: [[TMP206:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP207:%.*]] = add i32 [[TMP206]], -120 +; POST-PROCESS-NEXT: [[TMP208:%.*]] = add i32 [[TMP207]], 132 ; POST-PROCESS-NEXT: [[TMP209:%.*]] = inttoptr i32 [[TMP208]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP210:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP209]], i32 -30 -; POST-PROCESS-NEXT: [[TMP211:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP210]], i32 0, i32 0, i64 41 -; POST-PROCESS-NEXT: store i32 [[TMP85]], ptr addrspace(21) [[TMP211]], align 4 -; POST-PROCESS-NEXT: [[TMP212:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP213:%.*]] = inttoptr i32 [[TMP212]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP214:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP213]], i32 -30 -; POST-PROCESS-NEXT: [[TMP215:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP214]], i32 0, i32 0, i64 42 -; POST-PROCESS-NEXT: store i32 [[TMP90]], ptr addrspace(21) [[TMP215]], align 4 +; POST-PROCESS-NEXT: [[TMP210:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP209]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP49]], ptr addrspace(21) [[TMP210]], align 4 +; POST-PROCESS-NEXT: [[TMP211:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP212:%.*]] = add i32 [[TMP211]], -120 +; POST-PROCESS-NEXT: [[TMP213:%.*]] = add i32 [[TMP212]], 136 +; POST-PROCESS-NEXT: [[TMP214:%.*]] = inttoptr i32 [[TMP213]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP215:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP214]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP55]], ptr addrspace(21) [[TMP215]], align 4 ; POST-PROCESS-NEXT: [[TMP216:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP217:%.*]] = inttoptr i32 [[TMP216]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP218:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP217]], i32 -30 -; POST-PROCESS-NEXT: [[TMP219:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP218]], i32 0, i32 0, i64 43 -; POST-PROCESS-NEXT: store i32 [[TMP95]], ptr addrspace(21) [[TMP219]], align 4 -; POST-PROCESS-NEXT: [[TMP220:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP221:%.*]] = inttoptr i32 [[TMP220]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP222:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP221]], i32 -30 -; POST-PROCESS-NEXT: [[TMP223:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP222]], i32 0, i32 0, i64 44 -; POST-PROCESS-NEXT: store i32 [[TMP100]], ptr addrspace(21) [[TMP223]], align 4 -; POST-PROCESS-NEXT: [[TMP224:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP225:%.*]] = inttoptr i32 [[TMP224]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP226:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP225]], i32 -30 -; POST-PROCESS-NEXT: [[TMP227:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP226]], i32 0, i32 0, i64 45 -; POST-PROCESS-NEXT: store i32 [[TMP105]], ptr addrspace(21) [[TMP227]], align 4 -; POST-PROCESS-NEXT: [[TMP228:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP217:%.*]] = add i32 [[TMP216]], -120 +; POST-PROCESS-NEXT: [[TMP218:%.*]] = add i32 [[TMP217]], 140 +; POST-PROCESS-NEXT: [[TMP219:%.*]] = inttoptr i32 [[TMP218]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP220:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP219]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP61]], ptr addrspace(21) [[TMP220]], align 4 +; POST-PROCESS-NEXT: [[TMP221:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP222:%.*]] = add i32 [[TMP221]], -120 +; POST-PROCESS-NEXT: [[TMP223:%.*]] = add i32 [[TMP222]], 144 +; POST-PROCESS-NEXT: [[TMP224:%.*]] = inttoptr i32 [[TMP223]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP225:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP224]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP67]], ptr addrspace(21) [[TMP225]], align 4 +; POST-PROCESS-NEXT: [[TMP226:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP227:%.*]] = add i32 [[TMP226]], -120 +; POST-PROCESS-NEXT: [[TMP228:%.*]] = add i32 [[TMP227]], 148 ; POST-PROCESS-NEXT: [[TMP229:%.*]] = inttoptr i32 [[TMP228]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP230:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP229]], i32 -30 -; POST-PROCESS-NEXT: [[TMP231:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP230]], i32 0, i32 0, i64 46 -; POST-PROCESS-NEXT: store i32 [[TMP110]], ptr addrspace(21) [[TMP231]], align 4 -; POST-PROCESS-NEXT: [[TMP232:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP233:%.*]] = inttoptr i32 [[TMP232]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP234:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP233]], i32 -30 -; POST-PROCESS-NEXT: [[TMP235:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP234]], i32 0, i32 0, i64 47 -; POST-PROCESS-NEXT: store i32 [[TMP115]], ptr addrspace(21) [[TMP235]], align 4 +; POST-PROCESS-NEXT: [[TMP230:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP229]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP73]], ptr addrspace(21) [[TMP230]], align 4 +; POST-PROCESS-NEXT: [[TMP231:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP232:%.*]] = add i32 [[TMP231]], -120 +; POST-PROCESS-NEXT: [[TMP233:%.*]] = add i32 [[TMP232]], 152 +; POST-PROCESS-NEXT: [[TMP234:%.*]] = inttoptr i32 [[TMP233]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP235:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP234]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP79]], ptr addrspace(21) [[TMP235]], align 4 ; POST-PROCESS-NEXT: [[TMP236:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP237:%.*]] = inttoptr i32 [[TMP236]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP238:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP237]], i32 -30 -; POST-PROCESS-NEXT: [[TMP239:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP238]], i32 0, i32 0, i64 48 -; POST-PROCESS-NEXT: store i32 [[TMP120]], ptr addrspace(21) [[TMP239]], align 4 -; POST-PROCESS-NEXT: [[TMP240:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP241:%.*]] = inttoptr i32 [[TMP240]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP242:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP241]], i32 -30 -; POST-PROCESS-NEXT: [[TMP243:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP242]], i32 0, i32 0, i64 49 -; POST-PROCESS-NEXT: store i32 [[TMP125]], ptr addrspace(21) [[TMP243]], align 4 -; POST-PROCESS-NEXT: [[TMP244:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP245:%.*]] = inttoptr i32 [[TMP244]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP246:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP245]], i32 -30 -; POST-PROCESS-NEXT: [[TMP247:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP246]], i32 0, i32 0, i64 50 -; POST-PROCESS-NEXT: store i32 [[TMP130]], ptr addrspace(21) [[TMP247]], align 4 -; POST-PROCESS-NEXT: [[TMP248:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP237:%.*]] = add i32 [[TMP236]], -120 +; POST-PROCESS-NEXT: [[TMP238:%.*]] = add i32 [[TMP237]], 156 +; POST-PROCESS-NEXT: [[TMP239:%.*]] = inttoptr i32 [[TMP238]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP240:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP239]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP85]], ptr addrspace(21) [[TMP240]], align 4 +; POST-PROCESS-NEXT: [[TMP241:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP242:%.*]] = add i32 [[TMP241]], -120 +; POST-PROCESS-NEXT: [[TMP243:%.*]] = add i32 [[TMP242]], 160 +; POST-PROCESS-NEXT: [[TMP244:%.*]] = inttoptr i32 [[TMP243]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP245:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP244]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP91]], ptr addrspace(21) [[TMP245]], align 4 +; POST-PROCESS-NEXT: [[TMP246:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP247:%.*]] = add i32 [[TMP246]], -120 +; POST-PROCESS-NEXT: [[TMP248:%.*]] = add i32 [[TMP247]], 164 ; POST-PROCESS-NEXT: [[TMP249:%.*]] = inttoptr i32 [[TMP248]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP250:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP249]], i32 -30 -; POST-PROCESS-NEXT: [[TMP251:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP250]], i32 0, i32 0, i64 51 -; POST-PROCESS-NEXT: store i32 [[TMP135]], ptr addrspace(21) [[TMP251]], align 4 -; POST-PROCESS-NEXT: [[TMP252:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP253:%.*]] = inttoptr i32 [[TMP252]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP254:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP253]], i32 -30 -; POST-PROCESS-NEXT: [[TMP255:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP254]], i32 0, i32 0, i64 52 -; POST-PROCESS-NEXT: store i32 [[TMP140]], ptr addrspace(21) [[TMP255]], align 4 +; POST-PROCESS-NEXT: [[TMP250:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP249]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP97]], ptr addrspace(21) [[TMP250]], align 4 +; POST-PROCESS-NEXT: [[TMP251:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP252:%.*]] = add i32 [[TMP251]], -120 +; POST-PROCESS-NEXT: [[TMP253:%.*]] = add i32 [[TMP252]], 168 +; POST-PROCESS-NEXT: [[TMP254:%.*]] = inttoptr i32 [[TMP253]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP255:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP254]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP103]], ptr addrspace(21) [[TMP255]], align 4 ; POST-PROCESS-NEXT: [[TMP256:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP257:%.*]] = inttoptr i32 [[TMP256]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP258:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP257]], i32 -30 -; POST-PROCESS-NEXT: [[TMP259:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP258]], i32 0, i32 0, i64 53 -; POST-PROCESS-NEXT: store i32 [[TMP145]], ptr addrspace(21) [[TMP259]], align 4 -; POST-PROCESS-NEXT: [[TMP260:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP261:%.*]] = inttoptr i32 [[TMP260]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP262:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP261]], i32 -30 -; POST-PROCESS-NEXT: [[TMP263:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP262]], i32 0, i32 0, i64 54 -; POST-PROCESS-NEXT: store i32 [[TMP150]], ptr addrspace(21) [[TMP263]], align 4 -; POST-PROCESS-NEXT: [[TMP264:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP265:%.*]] = inttoptr i32 [[TMP264]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP266:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP265]], i32 -30 -; POST-PROCESS-NEXT: [[TMP267:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP266]], i32 0, i32 0, i64 55 -; POST-PROCESS-NEXT: store i32 [[TMP155]], ptr addrspace(21) [[TMP267]], align 4 -; POST-PROCESS-NEXT: [[TMP268:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP257:%.*]] = add i32 [[TMP256]], -120 +; POST-PROCESS-NEXT: [[TMP258:%.*]] = add i32 [[TMP257]], 172 +; POST-PROCESS-NEXT: [[TMP259:%.*]] = inttoptr i32 [[TMP258]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP260:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP259]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP109]], ptr addrspace(21) [[TMP260]], align 4 +; POST-PROCESS-NEXT: [[TMP261:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP262:%.*]] = add i32 [[TMP261]], -120 +; POST-PROCESS-NEXT: [[TMP263:%.*]] = add i32 [[TMP262]], 176 +; POST-PROCESS-NEXT: [[TMP264:%.*]] = inttoptr i32 [[TMP263]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP265:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP264]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP115]], ptr addrspace(21) [[TMP265]], align 4 +; POST-PROCESS-NEXT: [[TMP266:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP267:%.*]] = add i32 [[TMP266]], -120 +; POST-PROCESS-NEXT: [[TMP268:%.*]] = add i32 [[TMP267]], 180 ; POST-PROCESS-NEXT: [[TMP269:%.*]] = inttoptr i32 [[TMP268]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP270:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP269]], i32 -30 -; POST-PROCESS-NEXT: [[TMP271:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP270]], i32 0, i32 0, i64 56 -; POST-PROCESS-NEXT: store i32 [[TMP160]], ptr addrspace(21) [[TMP271]], align 4 +; POST-PROCESS-NEXT: [[TMP270:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP269]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP121]], ptr addrspace(21) [[TMP270]], align 4 +; POST-PROCESS-NEXT: [[TMP271:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP272:%.*]] = add i32 [[TMP271]], -120 +; POST-PROCESS-NEXT: [[TMP273:%.*]] = add i32 [[TMP272]], 184 +; POST-PROCESS-NEXT: [[TMP274:%.*]] = inttoptr i32 [[TMP273]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP275:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP274]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP127]], ptr addrspace(21) [[TMP275]], align 4 +; POST-PROCESS-NEXT: [[TMP276:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP277:%.*]] = add i32 [[TMP276]], -120 +; POST-PROCESS-NEXT: [[TMP278:%.*]] = add i32 [[TMP277]], 188 +; POST-PROCESS-NEXT: [[TMP279:%.*]] = inttoptr i32 [[TMP278]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP280:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP279]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP133]], ptr addrspace(21) [[TMP280]], align 4 +; POST-PROCESS-NEXT: [[TMP281:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP282:%.*]] = add i32 [[TMP281]], -120 +; POST-PROCESS-NEXT: [[TMP283:%.*]] = add i32 [[TMP282]], 192 +; POST-PROCESS-NEXT: [[TMP284:%.*]] = inttoptr i32 [[TMP283]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP285:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP284]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP139]], ptr addrspace(21) [[TMP285]], align 4 +; POST-PROCESS-NEXT: [[TMP286:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP287:%.*]] = add i32 [[TMP286]], -120 +; POST-PROCESS-NEXT: [[TMP288:%.*]] = add i32 [[TMP287]], 196 +; POST-PROCESS-NEXT: [[TMP289:%.*]] = inttoptr i32 [[TMP288]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP290:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP289]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP145]], ptr addrspace(21) [[TMP290]], align 4 +; POST-PROCESS-NEXT: [[TMP291:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP292:%.*]] = add i32 [[TMP291]], -120 +; POST-PROCESS-NEXT: [[TMP293:%.*]] = add i32 [[TMP292]], 200 +; POST-PROCESS-NEXT: [[TMP294:%.*]] = inttoptr i32 [[TMP293]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP295:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP294]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP151]], ptr addrspace(21) [[TMP295]], align 4 +; POST-PROCESS-NEXT: [[TMP296:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP297:%.*]] = add i32 [[TMP296]], -120 +; POST-PROCESS-NEXT: [[TMP298:%.*]] = add i32 [[TMP297]], 204 +; POST-PROCESS-NEXT: [[TMP299:%.*]] = inttoptr i32 [[TMP298]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP300:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP299]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP157]], ptr addrspace(21) [[TMP300]], align 4 +; POST-PROCESS-NEXT: [[TMP301:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP302:%.*]] = add i32 [[TMP301]], -120 +; POST-PROCESS-NEXT: [[TMP303:%.*]] = add i32 [[TMP302]], 208 +; POST-PROCESS-NEXT: [[TMP304:%.*]] = inttoptr i32 [[TMP303]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP305:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP304]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP163]], ptr addrspace(21) [[TMP305]], align 4 +; POST-PROCESS-NEXT: [[TMP306:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP307:%.*]] = add i32 [[TMP306]], -120 +; POST-PROCESS-NEXT: [[TMP308:%.*]] = add i32 [[TMP307]], 212 +; POST-PROCESS-NEXT: [[TMP309:%.*]] = inttoptr i32 [[TMP308]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP310:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP309]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP169]], ptr addrspace(21) [[TMP310]], align 4 +; POST-PROCESS-NEXT: [[TMP311:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP312:%.*]] = add i32 [[TMP311]], -120 +; POST-PROCESS-NEXT: [[TMP313:%.*]] = add i32 [[TMP312]], 216 +; POST-PROCESS-NEXT: [[TMP314:%.*]] = inttoptr i32 [[TMP313]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP315:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP314]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP175]], ptr addrspace(21) [[TMP315]], align 4 +; POST-PROCESS-NEXT: [[TMP316:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP317:%.*]] = add i32 [[TMP316]], -120 +; POST-PROCESS-NEXT: [[TMP318:%.*]] = add i32 [[TMP317]], 220 +; POST-PROCESS-NEXT: [[TMP319:%.*]] = inttoptr i32 [[TMP318]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP320:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP319]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP181]], ptr addrspace(21) [[TMP320]], align 4 +; POST-PROCESS-NEXT: [[TMP321:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP322:%.*]] = add i32 [[TMP321]], -120 +; POST-PROCESS-NEXT: [[TMP323:%.*]] = add i32 [[TMP322]], 224 +; POST-PROCESS-NEXT: [[TMP324:%.*]] = inttoptr i32 [[TMP323]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP325:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP324]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP187]], ptr addrspace(21) [[TMP325]], align 4 ; POST-PROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; POST-PROCESS-NEXT: [[TMP272:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 -; POST-PROCESS-NEXT: [[TMP273:%.*]] = bitcast i32 [[TMP272]] to float -; POST-PROCESS-NEXT: [[DOTSROA_012_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP273]], i32 0 +; POST-PROCESS-NEXT: [[TMP326:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[TMP327:%.*]] = bitcast i32 [[TMP326]] to float +; POST-PROCESS-NEXT: [[DOTSROA_012_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP327]], i32 0 ; POST-PROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; POST-PROCESS-NEXT: [[TMP274:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 -; POST-PROCESS-NEXT: [[TMP275:%.*]] = bitcast i32 [[TMP274]] to float -; POST-PROCESS-NEXT: [[DOTSROA_012_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_012_0_VEC_INSERT]], float [[TMP275]], i32 1 +; POST-PROCESS-NEXT: [[TMP328:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[TMP329:%.*]] = bitcast i32 [[TMP328]] to float +; POST-PROCESS-NEXT: [[DOTSROA_012_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_012_0_VEC_INSERT]], float [[TMP329]], i32 1 ; POST-PROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_012_4_VEC_INSERT]], 0 -; POST-PROCESS-NEXT: [[TMP276:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POST-PROCESS-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP276]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]) +; POST-PROCESS-NEXT: [[TMP330:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POST-PROCESS-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP330]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]) ; POST-PROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP1:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 ; POST-PROCESS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP1]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 @@ -1590,8 +1670,8 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: [[DOTFCA_1_1_GEP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 ; POST-PROCESS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP10]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 -; POST-PROCESS-NEXT: [[TMP277:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP277]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META17]] +; POST-PROCESS-NEXT: [[TMP331:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP331]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META17]] ; POST-PROCESS-NEXT: unreachable ; ; @@ -1601,15 +1681,13 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POST-PROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 108 -; POST-PROCESS-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME:%.*]], ptr addrspace(21) [[TMP3]], i32 0, i32 0 -; POST-PROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 +; POST-PROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 +; POST-PROCESS-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 108 +; POST-PROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 +; POST-PROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP5]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; POST-PROCESS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 -; POST-PROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 108 -; POST-PROCESS-NEXT: store i32 [[TMP5]], ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 @@ -1634,161 +1712,188 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP30:%.*]] = inttoptr i32 [[TMP29]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP30]], i32 -30 -; POST-PROCESS-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP31]], i32 0, i32 0, i64 30 -; POST-PROCESS-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(21) [[TMP32]], align 4 -; POST-PROCESS-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP34]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP35]], i32 -30 -; POST-PROCESS-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP36]], i32 0, i32 0, i64 31 -; POST-PROCESS-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(21) [[TMP37]], align 4 -; POST-PROCESS-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP40:%.*]] = inttoptr i32 [[TMP39]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP41:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP40]], i32 -30 -; POST-PROCESS-NEXT: [[TMP42:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP41]], i32 0, i32 0, i64 32 -; POST-PROCESS-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(21) [[TMP42]], align 4 -; POST-PROCESS-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP45:%.*]] = inttoptr i32 [[TMP44]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP45]], i32 -30 -; POST-PROCESS-NEXT: [[TMP47:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP46]], i32 0, i32 0, i64 33 -; POST-PROCESS-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(21) [[TMP47]], align 4 -; POST-PROCESS-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], -120 +; POST-PROCESS-NEXT: [[TMP31:%.*]] = add i32 [[TMP30]], 120 +; POST-PROCESS-NEXT: [[TMP32:%.*]] = inttoptr i32 [[TMP31]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP32]], i32 0 +; POST-PROCESS-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(21) [[TMP33]], align 4 +; POST-PROCESS-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], -120 +; POST-PROCESS-NEXT: [[TMP37:%.*]] = add i32 [[TMP36]], 124 +; POST-PROCESS-NEXT: [[TMP38:%.*]] = inttoptr i32 [[TMP37]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP38]], i32 0 +; POST-PROCESS-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(21) [[TMP39]], align 4 +; POST-PROCESS-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], -120 +; POST-PROCESS-NEXT: [[TMP43:%.*]] = add i32 [[TMP42]], 128 +; POST-PROCESS-NEXT: [[TMP44:%.*]] = inttoptr i32 [[TMP43]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP44]], i32 0 +; POST-PROCESS-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(21) [[TMP45]], align 4 +; POST-PROCESS-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], -120 +; POST-PROCESS-NEXT: [[TMP49:%.*]] = add i32 [[TMP48]], 132 ; POST-PROCESS-NEXT: [[TMP50:%.*]] = inttoptr i32 [[TMP49]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP51:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP50]], i32 -30 -; POST-PROCESS-NEXT: [[TMP52:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP51]], i32 0, i32 0, i64 34 -; POST-PROCESS-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(21) [[TMP52]], align 4 -; POST-PROCESS-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP55:%.*]] = inttoptr i32 [[TMP54]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP55]], i32 -30 -; POST-PROCESS-NEXT: [[TMP57:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP56]], i32 0, i32 0, i64 35 +; POST-PROCESS-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP50]], i32 0 +; POST-PROCESS-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(21) [[TMP51]], align 4 +; POST-PROCESS-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[TMP53]], -120 +; POST-PROCESS-NEXT: [[TMP55:%.*]] = add i32 [[TMP54]], 136 +; POST-PROCESS-NEXT: [[TMP56:%.*]] = inttoptr i32 [[TMP55]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP56]], i32 0 ; POST-PROCESS-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(21) [[TMP57]], align 4 ; POST-PROCESS-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP60:%.*]] = inttoptr i32 [[TMP59]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP61:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP60]], i32 -30 -; POST-PROCESS-NEXT: [[TMP62:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP61]], i32 0, i32 0, i64 36 -; POST-PROCESS-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(21) [[TMP62]], align 4 -; POST-PROCESS-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP65:%.*]] = inttoptr i32 [[TMP64]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP66:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP65]], i32 -30 -; POST-PROCESS-NEXT: [[TMP67:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP66]], i32 0, i32 0, i64 37 -; POST-PROCESS-NEXT: [[TMP68:%.*]] = load i32, ptr addrspace(21) [[TMP67]], align 4 -; POST-PROCESS-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP70:%.*]] = inttoptr i32 [[TMP69]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP70]], i32 -30 -; POST-PROCESS-NEXT: [[TMP72:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP71]], i32 0, i32 0, i64 38 -; POST-PROCESS-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(21) [[TMP72]], align 4 -; POST-PROCESS-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP75:%.*]] = inttoptr i32 [[TMP74]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP76:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP75]], i32 -30 -; POST-PROCESS-NEXT: [[TMP77:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP76]], i32 0, i32 0, i64 39 -; POST-PROCESS-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(21) [[TMP77]], align 4 -; POST-PROCESS-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP60:%.*]] = add i32 [[TMP59]], -120 +; POST-PROCESS-NEXT: [[TMP61:%.*]] = add i32 [[TMP60]], 140 +; POST-PROCESS-NEXT: [[TMP62:%.*]] = inttoptr i32 [[TMP61]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP62]], i32 0 +; POST-PROCESS-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(21) [[TMP63]], align 4 +; POST-PROCESS-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP66:%.*]] = add i32 [[TMP65]], -120 +; POST-PROCESS-NEXT: [[TMP67:%.*]] = add i32 [[TMP66]], 144 +; POST-PROCESS-NEXT: [[TMP68:%.*]] = inttoptr i32 [[TMP67]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP68]], i32 0 +; POST-PROCESS-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(21) [[TMP69]], align 4 +; POST-PROCESS-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], -120 +; POST-PROCESS-NEXT: [[TMP73:%.*]] = add i32 [[TMP72]], 148 +; POST-PROCESS-NEXT: [[TMP74:%.*]] = inttoptr i32 [[TMP73]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP75:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP74]], i32 0 +; POST-PROCESS-NEXT: [[TMP76:%.*]] = load i32, ptr addrspace(21) [[TMP75]], align 4 +; POST-PROCESS-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP78:%.*]] = add i32 [[TMP77]], -120 +; POST-PROCESS-NEXT: [[TMP79:%.*]] = add i32 [[TMP78]], 152 ; POST-PROCESS-NEXT: [[TMP80:%.*]] = inttoptr i32 [[TMP79]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP81:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP80]], i32 -30 -; POST-PROCESS-NEXT: [[TMP82:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP81]], i32 0, i32 0, i64 40 -; POST-PROCESS-NEXT: [[TMP83:%.*]] = load i32, ptr addrspace(21) [[TMP82]], align 4 -; POST-PROCESS-NEXT: [[TMP84:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP85:%.*]] = inttoptr i32 [[TMP84]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP86:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP85]], i32 -30 -; POST-PROCESS-NEXT: [[TMP87:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP86]], i32 0, i32 0, i64 41 +; POST-PROCESS-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP80]], i32 0 +; POST-PROCESS-NEXT: [[TMP82:%.*]] = load i32, ptr addrspace(21) [[TMP81]], align 4 +; POST-PROCESS-NEXT: [[TMP83:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP84:%.*]] = add i32 [[TMP83]], -120 +; POST-PROCESS-NEXT: [[TMP85:%.*]] = add i32 [[TMP84]], 156 +; POST-PROCESS-NEXT: [[TMP86:%.*]] = inttoptr i32 [[TMP85]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP86]], i32 0 ; POST-PROCESS-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(21) [[TMP87]], align 4 ; POST-PROCESS-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP90:%.*]] = inttoptr i32 [[TMP89]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP91:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP90]], i32 -30 -; POST-PROCESS-NEXT: [[TMP92:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP91]], i32 0, i32 0, i64 42 -; POST-PROCESS-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(21) [[TMP92]], align 4 -; POST-PROCESS-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP95:%.*]] = inttoptr i32 [[TMP94]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP96:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP95]], i32 -30 -; POST-PROCESS-NEXT: [[TMP97:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP96]], i32 0, i32 0, i64 43 -; POST-PROCESS-NEXT: [[TMP98:%.*]] = load i32, ptr addrspace(21) [[TMP97]], align 4 -; POST-PROCESS-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP100:%.*]] = inttoptr i32 [[TMP99]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP101:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP100]], i32 -30 -; POST-PROCESS-NEXT: [[TMP102:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP101]], i32 0, i32 0, i64 44 -; POST-PROCESS-NEXT: [[TMP103:%.*]] = load i32, ptr addrspace(21) [[TMP102]], align 4 -; POST-PROCESS-NEXT: [[TMP104:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP105:%.*]] = inttoptr i32 [[TMP104]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP106:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP105]], i32 -30 -; POST-PROCESS-NEXT: [[TMP107:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP106]], i32 0, i32 0, i64 45 -; POST-PROCESS-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(21) [[TMP107]], align 4 -; POST-PROCESS-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP90:%.*]] = add i32 [[TMP89]], -120 +; POST-PROCESS-NEXT: [[TMP91:%.*]] = add i32 [[TMP90]], 160 +; POST-PROCESS-NEXT: [[TMP92:%.*]] = inttoptr i32 [[TMP91]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP92]], i32 0 +; POST-PROCESS-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(21) [[TMP93]], align 4 +; POST-PROCESS-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP96:%.*]] = add i32 [[TMP95]], -120 +; POST-PROCESS-NEXT: [[TMP97:%.*]] = add i32 [[TMP96]], 164 +; POST-PROCESS-NEXT: [[TMP98:%.*]] = inttoptr i32 [[TMP97]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP98]], i32 0 +; POST-PROCESS-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(21) [[TMP99]], align 4 +; POST-PROCESS-NEXT: [[TMP101:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP102:%.*]] = add i32 [[TMP101]], -120 +; POST-PROCESS-NEXT: [[TMP103:%.*]] = add i32 [[TMP102]], 168 +; POST-PROCESS-NEXT: [[TMP104:%.*]] = inttoptr i32 [[TMP103]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP105:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP104]], i32 0 +; POST-PROCESS-NEXT: [[TMP106:%.*]] = load i32, ptr addrspace(21) [[TMP105]], align 4 +; POST-PROCESS-NEXT: [[TMP107:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP108:%.*]] = add i32 [[TMP107]], -120 +; POST-PROCESS-NEXT: [[TMP109:%.*]] = add i32 [[TMP108]], 172 ; POST-PROCESS-NEXT: [[TMP110:%.*]] = inttoptr i32 [[TMP109]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP111:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP110]], i32 -30 -; POST-PROCESS-NEXT: [[TMP112:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP111]], i32 0, i32 0, i64 46 -; POST-PROCESS-NEXT: [[TMP113:%.*]] = load i32, ptr addrspace(21) [[TMP112]], align 4 -; POST-PROCESS-NEXT: [[TMP114:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP115:%.*]] = inttoptr i32 [[TMP114]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP116:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP115]], i32 -30 -; POST-PROCESS-NEXT: [[TMP117:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP116]], i32 0, i32 0, i64 47 +; POST-PROCESS-NEXT: [[TMP111:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP110]], i32 0 +; POST-PROCESS-NEXT: [[TMP112:%.*]] = load i32, ptr addrspace(21) [[TMP111]], align 4 +; POST-PROCESS-NEXT: [[TMP113:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP114:%.*]] = add i32 [[TMP113]], -120 +; POST-PROCESS-NEXT: [[TMP115:%.*]] = add i32 [[TMP114]], 176 +; POST-PROCESS-NEXT: [[TMP116:%.*]] = inttoptr i32 [[TMP115]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP117:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP116]], i32 0 ; POST-PROCESS-NEXT: [[TMP118:%.*]] = load i32, ptr addrspace(21) [[TMP117]], align 4 ; POST-PROCESS-NEXT: [[TMP119:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP120:%.*]] = inttoptr i32 [[TMP119]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP121:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP120]], i32 -30 -; POST-PROCESS-NEXT: [[TMP122:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP121]], i32 0, i32 0, i64 48 -; POST-PROCESS-NEXT: [[TMP123:%.*]] = load i32, ptr addrspace(21) [[TMP122]], align 4 -; POST-PROCESS-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP125:%.*]] = inttoptr i32 [[TMP124]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP126:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP125]], i32 -30 -; POST-PROCESS-NEXT: [[TMP127:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP126]], i32 0, i32 0, i64 49 -; POST-PROCESS-NEXT: [[TMP128:%.*]] = load i32, ptr addrspace(21) [[TMP127]], align 4 -; POST-PROCESS-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP130:%.*]] = inttoptr i32 [[TMP129]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP131:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP130]], i32 -30 -; POST-PROCESS-NEXT: [[TMP132:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP131]], i32 0, i32 0, i64 50 -; POST-PROCESS-NEXT: [[TMP133:%.*]] = load i32, ptr addrspace(21) [[TMP132]], align 4 -; POST-PROCESS-NEXT: [[TMP134:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP135:%.*]] = inttoptr i32 [[TMP134]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP136:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP135]], i32 -30 -; POST-PROCESS-NEXT: [[TMP137:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP136]], i32 0, i32 0, i64 51 -; POST-PROCESS-NEXT: [[TMP138:%.*]] = load i32, ptr addrspace(21) [[TMP137]], align 4 -; POST-PROCESS-NEXT: [[TMP139:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], -120 +; POST-PROCESS-NEXT: [[TMP121:%.*]] = add i32 [[TMP120]], 180 +; POST-PROCESS-NEXT: [[TMP122:%.*]] = inttoptr i32 [[TMP121]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP123:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP122]], i32 0 +; POST-PROCESS-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(21) [[TMP123]], align 4 +; POST-PROCESS-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP126:%.*]] = add i32 [[TMP125]], -120 +; POST-PROCESS-NEXT: [[TMP127:%.*]] = add i32 [[TMP126]], 184 +; POST-PROCESS-NEXT: [[TMP128:%.*]] = inttoptr i32 [[TMP127]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP129:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP128]], i32 0 +; POST-PROCESS-NEXT: [[TMP130:%.*]] = load i32, ptr addrspace(21) [[TMP129]], align 4 +; POST-PROCESS-NEXT: [[TMP131:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP132:%.*]] = add i32 [[TMP131]], -120 +; POST-PROCESS-NEXT: [[TMP133:%.*]] = add i32 [[TMP132]], 188 +; POST-PROCESS-NEXT: [[TMP134:%.*]] = inttoptr i32 [[TMP133]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP135:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP134]], i32 0 +; POST-PROCESS-NEXT: [[TMP136:%.*]] = load i32, ptr addrspace(21) [[TMP135]], align 4 +; POST-PROCESS-NEXT: [[TMP137:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP138:%.*]] = add i32 [[TMP137]], -120 +; POST-PROCESS-NEXT: [[TMP139:%.*]] = add i32 [[TMP138]], 192 ; POST-PROCESS-NEXT: [[TMP140:%.*]] = inttoptr i32 [[TMP139]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP141:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP140]], i32 -30 -; POST-PROCESS-NEXT: [[TMP142:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP141]], i32 0, i32 0, i64 52 -; POST-PROCESS-NEXT: [[TMP143:%.*]] = load i32, ptr addrspace(21) [[TMP142]], align 4 -; POST-PROCESS-NEXT: [[TMP144:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP145:%.*]] = inttoptr i32 [[TMP144]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP146:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP145]], i32 -30 -; POST-PROCESS-NEXT: [[TMP147:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP146]], i32 0, i32 0, i64 53 +; POST-PROCESS-NEXT: [[TMP141:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP140]], i32 0 +; POST-PROCESS-NEXT: [[TMP142:%.*]] = load i32, ptr addrspace(21) [[TMP141]], align 4 +; POST-PROCESS-NEXT: [[TMP143:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP144:%.*]] = add i32 [[TMP143]], -120 +; POST-PROCESS-NEXT: [[TMP145:%.*]] = add i32 [[TMP144]], 196 +; POST-PROCESS-NEXT: [[TMP146:%.*]] = inttoptr i32 [[TMP145]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP147:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP146]], i32 0 ; POST-PROCESS-NEXT: [[TMP148:%.*]] = load i32, ptr addrspace(21) [[TMP147]], align 4 ; POST-PROCESS-NEXT: [[TMP149:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP150:%.*]] = inttoptr i32 [[TMP149]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP151:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP150]], i32 -30 -; POST-PROCESS-NEXT: [[TMP152:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP151]], i32 0, i32 0, i64 54 -; POST-PROCESS-NEXT: [[TMP153:%.*]] = load i32, ptr addrspace(21) [[TMP152]], align 4 -; POST-PROCESS-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP155:%.*]] = inttoptr i32 [[TMP154]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP156:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP155]], i32 -30 -; POST-PROCESS-NEXT: [[TMP157:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP156]], i32 0, i32 0, i64 55 -; POST-PROCESS-NEXT: [[TMP158:%.*]] = load i32, ptr addrspace(21) [[TMP157]], align 4 -; POST-PROCESS-NEXT: [[TMP159:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP160:%.*]] = inttoptr i32 [[TMP159]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP161:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP160]], i32 -30 -; POST-PROCESS-NEXT: [[TMP162:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(21) [[TMP161]], i32 0, i32 0, i64 56 -; POST-PROCESS-NEXT: [[TMP163:%.*]] = load i32, ptr addrspace(21) [[TMP162]], align 4 -; POST-PROCESS-NEXT: [[TMP164:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(21) [[TMP3]], i32 0, i32 1 -; POST-PROCESS-NEXT: store i32 [[TMP164]], ptr addrspace(21) [[DOTSPILL_ADDR]], align 4 +; POST-PROCESS-NEXT: [[TMP150:%.*]] = add i32 [[TMP149]], -120 +; POST-PROCESS-NEXT: [[TMP151:%.*]] = add i32 [[TMP150]], 200 +; POST-PROCESS-NEXT: [[TMP152:%.*]] = inttoptr i32 [[TMP151]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP153:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP152]], i32 0 +; POST-PROCESS-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(21) [[TMP153]], align 4 +; POST-PROCESS-NEXT: [[TMP155:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP156:%.*]] = add i32 [[TMP155]], -120 +; POST-PROCESS-NEXT: [[TMP157:%.*]] = add i32 [[TMP156]], 204 +; POST-PROCESS-NEXT: [[TMP158:%.*]] = inttoptr i32 [[TMP157]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP159:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP158]], i32 0 +; POST-PROCESS-NEXT: [[TMP160:%.*]] = load i32, ptr addrspace(21) [[TMP159]], align 4 +; POST-PROCESS-NEXT: [[TMP161:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP162:%.*]] = add i32 [[TMP161]], -120 +; POST-PROCESS-NEXT: [[TMP163:%.*]] = add i32 [[TMP162]], 208 +; POST-PROCESS-NEXT: [[TMP164:%.*]] = inttoptr i32 [[TMP163]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP165:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP164]], i32 0 +; POST-PROCESS-NEXT: [[TMP166:%.*]] = load i32, ptr addrspace(21) [[TMP165]], align 4 +; POST-PROCESS-NEXT: [[TMP167:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP168:%.*]] = add i32 [[TMP167]], -120 +; POST-PROCESS-NEXT: [[TMP169:%.*]] = add i32 [[TMP168]], 212 +; POST-PROCESS-NEXT: [[TMP170:%.*]] = inttoptr i32 [[TMP169]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP171:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP170]], i32 0 +; POST-PROCESS-NEXT: [[TMP172:%.*]] = load i32, ptr addrspace(21) [[TMP171]], align 4 +; POST-PROCESS-NEXT: [[TMP173:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP174:%.*]] = add i32 [[TMP173]], -120 +; POST-PROCESS-NEXT: [[TMP175:%.*]] = add i32 [[TMP174]], 216 +; POST-PROCESS-NEXT: [[TMP176:%.*]] = inttoptr i32 [[TMP175]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP177:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP176]], i32 0 +; POST-PROCESS-NEXT: [[TMP178:%.*]] = load i32, ptr addrspace(21) [[TMP177]], align 4 +; POST-PROCESS-NEXT: [[TMP179:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP180:%.*]] = add i32 [[TMP179]], -120 +; POST-PROCESS-NEXT: [[TMP181:%.*]] = add i32 [[TMP180]], 220 +; POST-PROCESS-NEXT: [[TMP182:%.*]] = inttoptr i32 [[TMP181]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP183:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP182]], i32 0 +; POST-PROCESS-NEXT: [[TMP184:%.*]] = load i32, ptr addrspace(21) [[TMP183]], align 4 +; POST-PROCESS-NEXT: [[TMP185:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP186:%.*]] = add i32 [[TMP185]], -120 +; POST-PROCESS-NEXT: [[TMP187:%.*]] = add i32 [[TMP186]], 224 +; POST-PROCESS-NEXT: [[TMP188:%.*]] = inttoptr i32 [[TMP187]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP189:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP188]], i32 0 +; POST-PROCESS-NEXT: [[TMP190:%.*]] = load i32, ptr addrspace(21) [[TMP189]], align 4 +; POST-PROCESS-NEXT: [[TMP191:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP192:%.*]] = add i32 [[TMP2]], 116 +; POST-PROCESS-NEXT: [[TMP193:%.*]] = inttoptr i32 [[TMP192]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP194:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP193]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP191]], ptr addrspace(21) [[TMP194]], align 4 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 ; POST-PROCESS-NEXT: [[DOTSROA_053_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; POST-PROCESS-NEXT: [[TMP165:%.*]] = bitcast float [[DOTSROA_053_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[TMP195:%.*]] = bitcast float [[DOTSROA_053_0_VEC_EXTRACT]] to i32 ; POST-PROCESS-NEXT: [[DOTSROA_053_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; POST-PROCESS-NEXT: [[TMP166:%.*]] = bitcast float [[DOTSROA_053_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[TMP196:%.*]] = bitcast float [[DOTSROA_053_4_VEC_EXTRACT]] to i32 ; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POST-PROCESS-NEXT: [[TMP167:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; POST-PROCESS-NEXT: [[TMP168:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; POST-PROCESS-NEXT: [[TMP169:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP167]]) -; POST-PROCESS-NEXT: [[TMP170:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP169]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; POST-PROCESS-NEXT: [[TMP171:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP170]]) +; POST-PROCESS-NEXT: [[TMP197:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; POST-PROCESS-NEXT: [[TMP198:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POST-PROCESS-NEXT: [[TMP199:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP197]]) +; POST-PROCESS-NEXT: [[TMP200:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP199]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; POST-PROCESS-NEXT: [[TMP201:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP200]]) ; POST-PROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; POST-PROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; POST-PROCESS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; POST-PROCESS-NEXT: [[TMP172:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP173:%.*]] = add i32 [[TMP172]], -108 -; POST-PROCESS-NEXT: store i32 [[TMP173]], ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) @REGISTERS, align 4 ; POST-PROCESS-NEXT: store i32 [[TMP6]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 @@ -1812,490 +1917,571 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: store i32 [[TMP27]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 ; POST-PROCESS-NEXT: store i32 [[TMP28]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP174:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP175:%.*]] = inttoptr i32 [[TMP174]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP176:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP175]], i32 -30 -; POST-PROCESS-NEXT: [[TMP177:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP176]], i32 0, i32 0, i64 30 -; POST-PROCESS-NEXT: store i32 [[TMP33]], ptr addrspace(21) [[TMP177]], align 4 -; POST-PROCESS-NEXT: [[TMP178:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP179:%.*]] = inttoptr i32 [[TMP178]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP180:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP179]], i32 -30 -; POST-PROCESS-NEXT: [[TMP181:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP180]], i32 0, i32 0, i64 31 -; POST-PROCESS-NEXT: store i32 [[TMP38]], ptr addrspace(21) [[TMP181]], align 4 -; POST-PROCESS-NEXT: [[TMP182:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP183:%.*]] = inttoptr i32 [[TMP182]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP184:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP183]], i32 -30 -; POST-PROCESS-NEXT: [[TMP185:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP184]], i32 0, i32 0, i64 32 -; POST-PROCESS-NEXT: store i32 [[TMP43]], ptr addrspace(21) [[TMP185]], align 4 -; POST-PROCESS-NEXT: [[TMP186:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP187:%.*]] = inttoptr i32 [[TMP186]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP188:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP187]], i32 -30 -; POST-PROCESS-NEXT: [[TMP189:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP188]], i32 0, i32 0, i64 33 -; POST-PROCESS-NEXT: store i32 [[TMP48]], ptr addrspace(21) [[TMP189]], align 4 -; POST-PROCESS-NEXT: [[TMP190:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP191:%.*]] = inttoptr i32 [[TMP190]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP192:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP191]], i32 -30 -; POST-PROCESS-NEXT: [[TMP193:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP192]], i32 0, i32 0, i64 34 -; POST-PROCESS-NEXT: store i32 [[TMP53]], ptr addrspace(21) [[TMP193]], align 4 -; POST-PROCESS-NEXT: [[TMP194:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP195:%.*]] = inttoptr i32 [[TMP194]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP196:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP195]], i32 -30 -; POST-PROCESS-NEXT: [[TMP197:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP196]], i32 0, i32 0, i64 35 -; POST-PROCESS-NEXT: store i32 [[TMP58]], ptr addrspace(21) [[TMP197]], align 4 -; POST-PROCESS-NEXT: [[TMP198:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP199:%.*]] = inttoptr i32 [[TMP198]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP200:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP199]], i32 -30 -; POST-PROCESS-NEXT: [[TMP201:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP200]], i32 0, i32 0, i64 36 -; POST-PROCESS-NEXT: store i32 [[TMP63]], ptr addrspace(21) [[TMP201]], align 4 ; POST-PROCESS-NEXT: [[TMP202:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP203:%.*]] = inttoptr i32 [[TMP202]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP204:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP203]], i32 -30 -; POST-PROCESS-NEXT: [[TMP205:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP204]], i32 0, i32 0, i64 37 -; POST-PROCESS-NEXT: store i32 [[TMP68]], ptr addrspace(21) [[TMP205]], align 4 -; POST-PROCESS-NEXT: [[TMP206:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP207:%.*]] = inttoptr i32 [[TMP206]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP208:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP207]], i32 -30 -; POST-PROCESS-NEXT: [[TMP209:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP208]], i32 0, i32 0, i64 38 -; POST-PROCESS-NEXT: store i32 [[TMP73]], ptr addrspace(21) [[TMP209]], align 4 -; POST-PROCESS-NEXT: [[TMP210:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP211:%.*]] = inttoptr i32 [[TMP210]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP212:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP211]], i32 -30 -; POST-PROCESS-NEXT: [[TMP213:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP212]], i32 0, i32 0, i64 39 -; POST-PROCESS-NEXT: store i32 [[TMP78]], ptr addrspace(21) [[TMP213]], align 4 -; POST-PROCESS-NEXT: [[TMP214:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP203:%.*]] = add i32 [[TMP202]], -120 +; POST-PROCESS-NEXT: [[TMP204:%.*]] = add i32 [[TMP203]], 120 +; POST-PROCESS-NEXT: [[TMP205:%.*]] = inttoptr i32 [[TMP204]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP206:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP205]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP34]], ptr addrspace(21) [[TMP206]], align 4 +; POST-PROCESS-NEXT: [[TMP207:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP208:%.*]] = add i32 [[TMP207]], -120 +; POST-PROCESS-NEXT: [[TMP209:%.*]] = add i32 [[TMP208]], 124 +; POST-PROCESS-NEXT: [[TMP210:%.*]] = inttoptr i32 [[TMP209]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP211:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP210]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP40]], ptr addrspace(21) [[TMP211]], align 4 +; POST-PROCESS-NEXT: [[TMP212:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP213:%.*]] = add i32 [[TMP212]], -120 +; POST-PROCESS-NEXT: [[TMP214:%.*]] = add i32 [[TMP213]], 128 ; POST-PROCESS-NEXT: [[TMP215:%.*]] = inttoptr i32 [[TMP214]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP216:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP215]], i32 -30 -; POST-PROCESS-NEXT: [[TMP217:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP216]], i32 0, i32 0, i64 40 -; POST-PROCESS-NEXT: store i32 [[TMP83]], ptr addrspace(21) [[TMP217]], align 4 -; POST-PROCESS-NEXT: [[TMP218:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP219:%.*]] = inttoptr i32 [[TMP218]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP220:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP219]], i32 -30 -; POST-PROCESS-NEXT: [[TMP221:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP220]], i32 0, i32 0, i64 41 -; POST-PROCESS-NEXT: store i32 [[TMP88]], ptr addrspace(21) [[TMP221]], align 4 +; POST-PROCESS-NEXT: [[TMP216:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP215]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP46]], ptr addrspace(21) [[TMP216]], align 4 +; POST-PROCESS-NEXT: [[TMP217:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP218:%.*]] = add i32 [[TMP217]], -120 +; POST-PROCESS-NEXT: [[TMP219:%.*]] = add i32 [[TMP218]], 132 +; POST-PROCESS-NEXT: [[TMP220:%.*]] = inttoptr i32 [[TMP219]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP221:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP220]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP52]], ptr addrspace(21) [[TMP221]], align 4 ; POST-PROCESS-NEXT: [[TMP222:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP223:%.*]] = inttoptr i32 [[TMP222]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP224:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP223]], i32 -30 -; POST-PROCESS-NEXT: [[TMP225:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP224]], i32 0, i32 0, i64 42 -; POST-PROCESS-NEXT: store i32 [[TMP93]], ptr addrspace(21) [[TMP225]], align 4 -; POST-PROCESS-NEXT: [[TMP226:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP227:%.*]] = inttoptr i32 [[TMP226]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP228:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP227]], i32 -30 -; POST-PROCESS-NEXT: [[TMP229:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP228]], i32 0, i32 0, i64 43 -; POST-PROCESS-NEXT: store i32 [[TMP98]], ptr addrspace(21) [[TMP229]], align 4 -; POST-PROCESS-NEXT: [[TMP230:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP231:%.*]] = inttoptr i32 [[TMP230]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP232:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP231]], i32 -30 -; POST-PROCESS-NEXT: [[TMP233:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP232]], i32 0, i32 0, i64 44 -; POST-PROCESS-NEXT: store i32 [[TMP103]], ptr addrspace(21) [[TMP233]], align 4 -; POST-PROCESS-NEXT: [[TMP234:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP223:%.*]] = add i32 [[TMP222]], -120 +; POST-PROCESS-NEXT: [[TMP224:%.*]] = add i32 [[TMP223]], 136 +; POST-PROCESS-NEXT: [[TMP225:%.*]] = inttoptr i32 [[TMP224]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP226:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP225]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP58]], ptr addrspace(21) [[TMP226]], align 4 +; POST-PROCESS-NEXT: [[TMP227:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP228:%.*]] = add i32 [[TMP227]], -120 +; POST-PROCESS-NEXT: [[TMP229:%.*]] = add i32 [[TMP228]], 140 +; POST-PROCESS-NEXT: [[TMP230:%.*]] = inttoptr i32 [[TMP229]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP231:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP230]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP64]], ptr addrspace(21) [[TMP231]], align 4 +; POST-PROCESS-NEXT: [[TMP232:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP233:%.*]] = add i32 [[TMP232]], -120 +; POST-PROCESS-NEXT: [[TMP234:%.*]] = add i32 [[TMP233]], 144 ; POST-PROCESS-NEXT: [[TMP235:%.*]] = inttoptr i32 [[TMP234]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP236:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP235]], i32 -30 -; POST-PROCESS-NEXT: [[TMP237:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP236]], i32 0, i32 0, i64 45 -; POST-PROCESS-NEXT: store i32 [[TMP108]], ptr addrspace(21) [[TMP237]], align 4 -; POST-PROCESS-NEXT: [[TMP238:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP239:%.*]] = inttoptr i32 [[TMP238]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP240:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP239]], i32 -30 -; POST-PROCESS-NEXT: [[TMP241:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP240]], i32 0, i32 0, i64 46 -; POST-PROCESS-NEXT: store i32 [[TMP113]], ptr addrspace(21) [[TMP241]], align 4 +; POST-PROCESS-NEXT: [[TMP236:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP235]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP70]], ptr addrspace(21) [[TMP236]], align 4 +; POST-PROCESS-NEXT: [[TMP237:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP238:%.*]] = add i32 [[TMP237]], -120 +; POST-PROCESS-NEXT: [[TMP239:%.*]] = add i32 [[TMP238]], 148 +; POST-PROCESS-NEXT: [[TMP240:%.*]] = inttoptr i32 [[TMP239]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP241:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP240]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP76]], ptr addrspace(21) [[TMP241]], align 4 ; POST-PROCESS-NEXT: [[TMP242:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP243:%.*]] = inttoptr i32 [[TMP242]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP244:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP243]], i32 -30 -; POST-PROCESS-NEXT: [[TMP245:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP244]], i32 0, i32 0, i64 47 -; POST-PROCESS-NEXT: store i32 [[TMP118]], ptr addrspace(21) [[TMP245]], align 4 -; POST-PROCESS-NEXT: [[TMP246:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP247:%.*]] = inttoptr i32 [[TMP246]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP248:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP247]], i32 -30 -; POST-PROCESS-NEXT: [[TMP249:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP248]], i32 0, i32 0, i64 48 -; POST-PROCESS-NEXT: store i32 [[TMP123]], ptr addrspace(21) [[TMP249]], align 4 -; POST-PROCESS-NEXT: [[TMP250:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP251:%.*]] = inttoptr i32 [[TMP250]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP252:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP251]], i32 -30 -; POST-PROCESS-NEXT: [[TMP253:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP252]], i32 0, i32 0, i64 49 -; POST-PROCESS-NEXT: store i32 [[TMP128]], ptr addrspace(21) [[TMP253]], align 4 -; POST-PROCESS-NEXT: [[TMP254:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP243:%.*]] = add i32 [[TMP242]], -120 +; POST-PROCESS-NEXT: [[TMP244:%.*]] = add i32 [[TMP243]], 152 +; POST-PROCESS-NEXT: [[TMP245:%.*]] = inttoptr i32 [[TMP244]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP246:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP245]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP82]], ptr addrspace(21) [[TMP246]], align 4 +; POST-PROCESS-NEXT: [[TMP247:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP248:%.*]] = add i32 [[TMP247]], -120 +; POST-PROCESS-NEXT: [[TMP249:%.*]] = add i32 [[TMP248]], 156 +; POST-PROCESS-NEXT: [[TMP250:%.*]] = inttoptr i32 [[TMP249]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP251:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP250]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP88]], ptr addrspace(21) [[TMP251]], align 4 +; POST-PROCESS-NEXT: [[TMP252:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP253:%.*]] = add i32 [[TMP252]], -120 +; POST-PROCESS-NEXT: [[TMP254:%.*]] = add i32 [[TMP253]], 160 ; POST-PROCESS-NEXT: [[TMP255:%.*]] = inttoptr i32 [[TMP254]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP256:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP255]], i32 -30 -; POST-PROCESS-NEXT: [[TMP257:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP256]], i32 0, i32 0, i64 50 -; POST-PROCESS-NEXT: store i32 [[TMP133]], ptr addrspace(21) [[TMP257]], align 4 -; POST-PROCESS-NEXT: [[TMP258:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP259:%.*]] = inttoptr i32 [[TMP258]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP260:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP259]], i32 -30 -; POST-PROCESS-NEXT: [[TMP261:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP260]], i32 0, i32 0, i64 51 -; POST-PROCESS-NEXT: store i32 [[TMP138]], ptr addrspace(21) [[TMP261]], align 4 +; POST-PROCESS-NEXT: [[TMP256:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP255]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP94]], ptr addrspace(21) [[TMP256]], align 4 +; POST-PROCESS-NEXT: [[TMP257:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP258:%.*]] = add i32 [[TMP257]], -120 +; POST-PROCESS-NEXT: [[TMP259:%.*]] = add i32 [[TMP258]], 164 +; POST-PROCESS-NEXT: [[TMP260:%.*]] = inttoptr i32 [[TMP259]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP261:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP260]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP100]], ptr addrspace(21) [[TMP261]], align 4 ; POST-PROCESS-NEXT: [[TMP262:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP263:%.*]] = inttoptr i32 [[TMP262]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP264:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP263]], i32 -30 -; POST-PROCESS-NEXT: [[TMP265:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP264]], i32 0, i32 0, i64 52 -; POST-PROCESS-NEXT: store i32 [[TMP143]], ptr addrspace(21) [[TMP265]], align 4 -; POST-PROCESS-NEXT: [[TMP266:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP267:%.*]] = inttoptr i32 [[TMP266]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP268:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP267]], i32 -30 -; POST-PROCESS-NEXT: [[TMP269:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP268]], i32 0, i32 0, i64 53 -; POST-PROCESS-NEXT: store i32 [[TMP148]], ptr addrspace(21) [[TMP269]], align 4 -; POST-PROCESS-NEXT: [[TMP270:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP271:%.*]] = inttoptr i32 [[TMP270]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP272:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP271]], i32 -30 -; POST-PROCESS-NEXT: [[TMP273:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP272]], i32 0, i32 0, i64 54 -; POST-PROCESS-NEXT: store i32 [[TMP153]], ptr addrspace(21) [[TMP273]], align 4 -; POST-PROCESS-NEXT: [[TMP274:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP263:%.*]] = add i32 [[TMP262]], -120 +; POST-PROCESS-NEXT: [[TMP264:%.*]] = add i32 [[TMP263]], 168 +; POST-PROCESS-NEXT: [[TMP265:%.*]] = inttoptr i32 [[TMP264]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP266:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP265]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP106]], ptr addrspace(21) [[TMP266]], align 4 +; POST-PROCESS-NEXT: [[TMP267:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP268:%.*]] = add i32 [[TMP267]], -120 +; POST-PROCESS-NEXT: [[TMP269:%.*]] = add i32 [[TMP268]], 172 +; POST-PROCESS-NEXT: [[TMP270:%.*]] = inttoptr i32 [[TMP269]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP271:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP270]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP112]], ptr addrspace(21) [[TMP271]], align 4 +; POST-PROCESS-NEXT: [[TMP272:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP273:%.*]] = add i32 [[TMP272]], -120 +; POST-PROCESS-NEXT: [[TMP274:%.*]] = add i32 [[TMP273]], 176 ; POST-PROCESS-NEXT: [[TMP275:%.*]] = inttoptr i32 [[TMP274]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP276:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP275]], i32 -30 -; POST-PROCESS-NEXT: [[TMP277:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP276]], i32 0, i32 0, i64 55 -; POST-PROCESS-NEXT: store i32 [[TMP158]], ptr addrspace(21) [[TMP277]], align 4 -; POST-PROCESS-NEXT: [[TMP278:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP279:%.*]] = inttoptr i32 [[TMP278]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP280:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP279]], i32 -30 -; POST-PROCESS-NEXT: [[TMP281:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP280]], i32 0, i32 0, i64 56 -; POST-PROCESS-NEXT: store i32 [[TMP163]], ptr addrspace(21) [[TMP281]], align 4 -; POST-PROCESS-NEXT: [[TMP282:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP283:%.*]] = add i32 [[TMP282]], 12 -; POST-PROCESS-NEXT: store i32 [[TMP283]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP284:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP285:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @ClosestHit.resume.0 to i64)) -; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP284]], i64 [[TMP285]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount !17 +; POST-PROCESS-NEXT: [[TMP276:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP275]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP118]], ptr addrspace(21) [[TMP276]], align 4 +; POST-PROCESS-NEXT: [[TMP277:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP278:%.*]] = add i32 [[TMP277]], -120 +; POST-PROCESS-NEXT: [[TMP279:%.*]] = add i32 [[TMP278]], 180 +; POST-PROCESS-NEXT: [[TMP280:%.*]] = inttoptr i32 [[TMP279]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP281:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP280]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP124]], ptr addrspace(21) [[TMP281]], align 4 +; POST-PROCESS-NEXT: [[TMP282:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP283:%.*]] = add i32 [[TMP282]], -120 +; POST-PROCESS-NEXT: [[TMP284:%.*]] = add i32 [[TMP283]], 184 +; POST-PROCESS-NEXT: [[TMP285:%.*]] = inttoptr i32 [[TMP284]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP286:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP285]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP130]], ptr addrspace(21) [[TMP286]], align 4 +; POST-PROCESS-NEXT: [[TMP287:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP288:%.*]] = add i32 [[TMP287]], -120 +; POST-PROCESS-NEXT: [[TMP289:%.*]] = add i32 [[TMP288]], 188 +; POST-PROCESS-NEXT: [[TMP290:%.*]] = inttoptr i32 [[TMP289]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP291:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP290]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP136]], ptr addrspace(21) [[TMP291]], align 4 +; POST-PROCESS-NEXT: [[TMP292:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP293:%.*]] = add i32 [[TMP292]], -120 +; POST-PROCESS-NEXT: [[TMP294:%.*]] = add i32 [[TMP293]], 192 +; POST-PROCESS-NEXT: [[TMP295:%.*]] = inttoptr i32 [[TMP294]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP296:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP295]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP142]], ptr addrspace(21) [[TMP296]], align 4 +; POST-PROCESS-NEXT: [[TMP297:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP298:%.*]] = add i32 [[TMP297]], -120 +; POST-PROCESS-NEXT: [[TMP299:%.*]] = add i32 [[TMP298]], 196 +; POST-PROCESS-NEXT: [[TMP300:%.*]] = inttoptr i32 [[TMP299]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP301:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP300]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP148]], ptr addrspace(21) [[TMP301]], align 4 +; POST-PROCESS-NEXT: [[TMP302:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP303:%.*]] = add i32 [[TMP302]], -120 +; POST-PROCESS-NEXT: [[TMP304:%.*]] = add i32 [[TMP303]], 200 +; POST-PROCESS-NEXT: [[TMP305:%.*]] = inttoptr i32 [[TMP304]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP306:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP305]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP154]], ptr addrspace(21) [[TMP306]], align 4 +; POST-PROCESS-NEXT: [[TMP307:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP308:%.*]] = add i32 [[TMP307]], -120 +; POST-PROCESS-NEXT: [[TMP309:%.*]] = add i32 [[TMP308]], 204 +; POST-PROCESS-NEXT: [[TMP310:%.*]] = inttoptr i32 [[TMP309]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP311:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP310]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP160]], ptr addrspace(21) [[TMP311]], align 4 +; POST-PROCESS-NEXT: [[TMP312:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP313:%.*]] = add i32 [[TMP312]], -120 +; POST-PROCESS-NEXT: [[TMP314:%.*]] = add i32 [[TMP313]], 208 +; POST-PROCESS-NEXT: [[TMP315:%.*]] = inttoptr i32 [[TMP314]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP316:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP315]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP166]], ptr addrspace(21) [[TMP316]], align 4 +; POST-PROCESS-NEXT: [[TMP317:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP318:%.*]] = add i32 [[TMP317]], -120 +; POST-PROCESS-NEXT: [[TMP319:%.*]] = add i32 [[TMP318]], 212 +; POST-PROCESS-NEXT: [[TMP320:%.*]] = inttoptr i32 [[TMP319]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP321:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP320]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP172]], ptr addrspace(21) [[TMP321]], align 4 +; POST-PROCESS-NEXT: [[TMP322:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP323:%.*]] = add i32 [[TMP322]], -120 +; POST-PROCESS-NEXT: [[TMP324:%.*]] = add i32 [[TMP323]], 216 +; POST-PROCESS-NEXT: [[TMP325:%.*]] = inttoptr i32 [[TMP324]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP326:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP325]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP178]], ptr addrspace(21) [[TMP326]], align 4 +; POST-PROCESS-NEXT: [[TMP327:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP328:%.*]] = add i32 [[TMP327]], -120 +; POST-PROCESS-NEXT: [[TMP329:%.*]] = add i32 [[TMP328]], 220 +; POST-PROCESS-NEXT: [[TMP330:%.*]] = inttoptr i32 [[TMP329]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP331:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP330]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP184]], ptr addrspace(21) [[TMP331]], align 4 +; POST-PROCESS-NEXT: [[TMP332:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP333:%.*]] = add i32 [[TMP332]], -120 +; POST-PROCESS-NEXT: [[TMP334:%.*]] = add i32 [[TMP333]], 224 +; POST-PROCESS-NEXT: [[TMP335:%.*]] = inttoptr i32 [[TMP334]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP336:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP335]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP190]], ptr addrspace(21) [[TMP336]], align 4 +; POST-PROCESS-NEXT: [[TMP337:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP338:%.*]] = add i32 [[TMP337]], 120 +; POST-PROCESS-NEXT: store i32 [[TMP338]], ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP339:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP340:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @ClosestHit.resume.0 to i64)) +; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP339]], i64 [[TMP340]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount !17 ; POST-PROCESS-NEXT: unreachable ; ; ; POST-PROCESS-LABEL: define dso_local void @ClosestHit.resume.0( -; POST-PROCESS-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META24]] !continuation.registercount [[META17]] !continuation [[META25]] { +; POST-PROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META24]] !continuation.registercount [[META17]] !continuation [[META25]] { ; POST-PROCESS-NEXT: entryresume.0: ; POST-PROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; POST-PROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -12 -; POST-PROCESS-NEXT: store i32 [[TMP3]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP5]], i64 0 -; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -120 +; POST-PROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 +; POST-PROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], -120 +; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], 120 ; POST-PROCESS-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP31]], i32 -30 -; POST-PROCESS-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP32]], i32 0, i32 0, i64 30 -; POST-PROCESS-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(21) [[TMP33]], align 4 -; POST-PROCESS-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP36:%.*]] = inttoptr i32 [[TMP35]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP37:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP36]], i32 -30 -; POST-PROCESS-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP37]], i32 0, i32 0, i64 31 +; POST-PROCESS-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP31]], i32 0 +; POST-PROCESS-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(21) [[TMP32]], align 4 +; POST-PROCESS-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP35:%.*]] = add i32 [[TMP34]], -120 +; POST-PROCESS-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], 124 +; POST-PROCESS-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP36]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP37]], i32 0 ; POST-PROCESS-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(21) [[TMP38]], align 4 ; POST-PROCESS-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP41:%.*]] = inttoptr i32 [[TMP40]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP41]], i32 -30 -; POST-PROCESS-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP42]], i32 0, i32 0, i64 32 -; POST-PROCESS-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(21) [[TMP43]], align 4 -; POST-PROCESS-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP46:%.*]] = inttoptr i32 [[TMP45]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP46]], i32 -30 -; POST-PROCESS-NEXT: [[TMP48:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP47]], i32 0, i32 0, i64 33 -; POST-PROCESS-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(21) [[TMP48]], align 4 -; POST-PROCESS-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP51:%.*]] = inttoptr i32 [[TMP50]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP52:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP51]], i32 -30 -; POST-PROCESS-NEXT: [[TMP53:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP52]], i32 0, i32 0, i64 34 -; POST-PROCESS-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(21) [[TMP53]], align 4 -; POST-PROCESS-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP56:%.*]] = inttoptr i32 [[TMP55]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP56]], i32 -30 -; POST-PROCESS-NEXT: [[TMP58:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP57]], i32 0, i32 0, i64 35 -; POST-PROCESS-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(21) [[TMP58]], align 4 -; POST-PROCESS-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], -120 +; POST-PROCESS-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], 128 +; POST-PROCESS-NEXT: [[TMP43:%.*]] = inttoptr i32 [[TMP42]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP43]], i32 0 +; POST-PROCESS-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(21) [[TMP44]], align 4 +; POST-PROCESS-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], -120 +; POST-PROCESS-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 132 +; POST-PROCESS-NEXT: [[TMP49:%.*]] = inttoptr i32 [[TMP48]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP49]], i32 0 +; POST-PROCESS-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(21) [[TMP50]], align 4 +; POST-PROCESS-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP53:%.*]] = add i32 [[TMP52]], -120 +; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[TMP53]], 136 +; POST-PROCESS-NEXT: [[TMP55:%.*]] = inttoptr i32 [[TMP54]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP55]], i32 0 +; POST-PROCESS-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(21) [[TMP56]], align 4 +; POST-PROCESS-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP59:%.*]] = add i32 [[TMP58]], -120 +; POST-PROCESS-NEXT: [[TMP60:%.*]] = add i32 [[TMP59]], 140 ; POST-PROCESS-NEXT: [[TMP61:%.*]] = inttoptr i32 [[TMP60]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP62:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP61]], i32 -30 -; POST-PROCESS-NEXT: [[TMP63:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP62]], i32 0, i32 0, i64 36 -; POST-PROCESS-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(21) [[TMP63]], align 4 -; POST-PROCESS-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP66:%.*]] = inttoptr i32 [[TMP65]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP67:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP66]], i32 -30 -; POST-PROCESS-NEXT: [[TMP68:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP67]], i32 0, i32 0, i64 37 +; POST-PROCESS-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP61]], i32 0 +; POST-PROCESS-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(21) [[TMP62]], align 4 +; POST-PROCESS-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP65:%.*]] = add i32 [[TMP64]], -120 +; POST-PROCESS-NEXT: [[TMP66:%.*]] = add i32 [[TMP65]], 144 +; POST-PROCESS-NEXT: [[TMP67:%.*]] = inttoptr i32 [[TMP66]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP67]], i32 0 ; POST-PROCESS-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(21) [[TMP68]], align 4 ; POST-PROCESS-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP71:%.*]] = inttoptr i32 [[TMP70]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP71]], i32 -30 -; POST-PROCESS-NEXT: [[TMP73:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP72]], i32 0, i32 0, i64 38 -; POST-PROCESS-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(21) [[TMP73]], align 4 -; POST-PROCESS-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP76:%.*]] = inttoptr i32 [[TMP75]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP77:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP76]], i32 -30 -; POST-PROCESS-NEXT: [[TMP78:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP77]], i32 0, i32 0, i64 39 -; POST-PROCESS-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(21) [[TMP78]], align 4 -; POST-PROCESS-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP81:%.*]] = inttoptr i32 [[TMP80]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP82:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP81]], i32 -30 -; POST-PROCESS-NEXT: [[TMP83:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP82]], i32 0, i32 0, i64 40 -; POST-PROCESS-NEXT: [[TMP84:%.*]] = load i32, ptr addrspace(21) [[TMP83]], align 4 -; POST-PROCESS-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP86:%.*]] = inttoptr i32 [[TMP85]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP87:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP86]], i32 -30 -; POST-PROCESS-NEXT: [[TMP88:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP87]], i32 0, i32 0, i64 41 -; POST-PROCESS-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(21) [[TMP88]], align 4 -; POST-PROCESS-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP71:%.*]] = add i32 [[TMP70]], -120 +; POST-PROCESS-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], 148 +; POST-PROCESS-NEXT: [[TMP73:%.*]] = inttoptr i32 [[TMP72]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP73]], i32 0 +; POST-PROCESS-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(21) [[TMP74]], align 4 +; POST-PROCESS-NEXT: [[TMP76:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP77:%.*]] = add i32 [[TMP76]], -120 +; POST-PROCESS-NEXT: [[TMP78:%.*]] = add i32 [[TMP77]], 152 +; POST-PROCESS-NEXT: [[TMP79:%.*]] = inttoptr i32 [[TMP78]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP79]], i32 0 +; POST-PROCESS-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(21) [[TMP80]], align 4 +; POST-PROCESS-NEXT: [[TMP82:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP83:%.*]] = add i32 [[TMP82]], -120 +; POST-PROCESS-NEXT: [[TMP84:%.*]] = add i32 [[TMP83]], 156 +; POST-PROCESS-NEXT: [[TMP85:%.*]] = inttoptr i32 [[TMP84]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP85]], i32 0 +; POST-PROCESS-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(21) [[TMP86]], align 4 +; POST-PROCESS-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP89:%.*]] = add i32 [[TMP88]], -120 +; POST-PROCESS-NEXT: [[TMP90:%.*]] = add i32 [[TMP89]], 160 ; POST-PROCESS-NEXT: [[TMP91:%.*]] = inttoptr i32 [[TMP90]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP92:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP91]], i32 -30 -; POST-PROCESS-NEXT: [[TMP93:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP92]], i32 0, i32 0, i64 42 -; POST-PROCESS-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(21) [[TMP93]], align 4 -; POST-PROCESS-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP96:%.*]] = inttoptr i32 [[TMP95]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP97:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP96]], i32 -30 -; POST-PROCESS-NEXT: [[TMP98:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP97]], i32 0, i32 0, i64 43 +; POST-PROCESS-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP91]], i32 0 +; POST-PROCESS-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(21) [[TMP92]], align 4 +; POST-PROCESS-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP95:%.*]] = add i32 [[TMP94]], -120 +; POST-PROCESS-NEXT: [[TMP96:%.*]] = add i32 [[TMP95]], 164 +; POST-PROCESS-NEXT: [[TMP97:%.*]] = inttoptr i32 [[TMP96]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP97]], i32 0 ; POST-PROCESS-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(21) [[TMP98]], align 4 ; POST-PROCESS-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP101:%.*]] = inttoptr i32 [[TMP100]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP102:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP101]], i32 -30 -; POST-PROCESS-NEXT: [[TMP103:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP102]], i32 0, i32 0, i64 44 -; POST-PROCESS-NEXT: [[TMP104:%.*]] = load i32, ptr addrspace(21) [[TMP103]], align 4 -; POST-PROCESS-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP106:%.*]] = inttoptr i32 [[TMP105]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP107:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP106]], i32 -30 -; POST-PROCESS-NEXT: [[TMP108:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP107]], i32 0, i32 0, i64 45 -; POST-PROCESS-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(21) [[TMP108]], align 4 -; POST-PROCESS-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP111:%.*]] = inttoptr i32 [[TMP110]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP112:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP111]], i32 -30 -; POST-PROCESS-NEXT: [[TMP113:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP112]], i32 0, i32 0, i64 46 -; POST-PROCESS-NEXT: [[TMP114:%.*]] = load i32, ptr addrspace(21) [[TMP113]], align 4 -; POST-PROCESS-NEXT: [[TMP115:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP116:%.*]] = inttoptr i32 [[TMP115]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP117:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP116]], i32 -30 -; POST-PROCESS-NEXT: [[TMP118:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP117]], i32 0, i32 0, i64 47 -; POST-PROCESS-NEXT: [[TMP119:%.*]] = load i32, ptr addrspace(21) [[TMP118]], align 4 -; POST-PROCESS-NEXT: [[TMP120:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP101:%.*]] = add i32 [[TMP100]], -120 +; POST-PROCESS-NEXT: [[TMP102:%.*]] = add i32 [[TMP101]], 168 +; POST-PROCESS-NEXT: [[TMP103:%.*]] = inttoptr i32 [[TMP102]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP103]], i32 0 +; POST-PROCESS-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(21) [[TMP104]], align 4 +; POST-PROCESS-NEXT: [[TMP106:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP107:%.*]] = add i32 [[TMP106]], -120 +; POST-PROCESS-NEXT: [[TMP108:%.*]] = add i32 [[TMP107]], 172 +; POST-PROCESS-NEXT: [[TMP109:%.*]] = inttoptr i32 [[TMP108]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP110:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP109]], i32 0 +; POST-PROCESS-NEXT: [[TMP111:%.*]] = load i32, ptr addrspace(21) [[TMP110]], align 4 +; POST-PROCESS-NEXT: [[TMP112:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP113:%.*]] = add i32 [[TMP112]], -120 +; POST-PROCESS-NEXT: [[TMP114:%.*]] = add i32 [[TMP113]], 176 +; POST-PROCESS-NEXT: [[TMP115:%.*]] = inttoptr i32 [[TMP114]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP116:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP115]], i32 0 +; POST-PROCESS-NEXT: [[TMP117:%.*]] = load i32, ptr addrspace(21) [[TMP116]], align 4 +; POST-PROCESS-NEXT: [[TMP118:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP119:%.*]] = add i32 [[TMP118]], -120 +; POST-PROCESS-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], 180 ; POST-PROCESS-NEXT: [[TMP121:%.*]] = inttoptr i32 [[TMP120]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP122:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP121]], i32 -30 -; POST-PROCESS-NEXT: [[TMP123:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP122]], i32 0, i32 0, i64 48 -; POST-PROCESS-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(21) [[TMP123]], align 4 -; POST-PROCESS-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP126:%.*]] = inttoptr i32 [[TMP125]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP127:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP126]], i32 -30 -; POST-PROCESS-NEXT: [[TMP128:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP127]], i32 0, i32 0, i64 49 +; POST-PROCESS-NEXT: [[TMP122:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP121]], i32 0 +; POST-PROCESS-NEXT: [[TMP123:%.*]] = load i32, ptr addrspace(21) [[TMP122]], align 4 +; POST-PROCESS-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP125:%.*]] = add i32 [[TMP124]], -120 +; POST-PROCESS-NEXT: [[TMP126:%.*]] = add i32 [[TMP125]], 184 +; POST-PROCESS-NEXT: [[TMP127:%.*]] = inttoptr i32 [[TMP126]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP127]], i32 0 ; POST-PROCESS-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(21) [[TMP128]], align 4 ; POST-PROCESS-NEXT: [[TMP130:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP131:%.*]] = inttoptr i32 [[TMP130]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP132:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP131]], i32 -30 -; POST-PROCESS-NEXT: [[TMP133:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP132]], i32 0, i32 0, i64 50 -; POST-PROCESS-NEXT: [[TMP134:%.*]] = load i32, ptr addrspace(21) [[TMP133]], align 4 -; POST-PROCESS-NEXT: [[TMP135:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP136:%.*]] = inttoptr i32 [[TMP135]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP137:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP136]], i32 -30 -; POST-PROCESS-NEXT: [[TMP138:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP137]], i32 0, i32 0, i64 51 -; POST-PROCESS-NEXT: [[TMP139:%.*]] = load i32, ptr addrspace(21) [[TMP138]], align 4 -; POST-PROCESS-NEXT: [[TMP140:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP141:%.*]] = inttoptr i32 [[TMP140]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP142:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP141]], i32 -30 -; POST-PROCESS-NEXT: [[TMP143:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP142]], i32 0, i32 0, i64 52 -; POST-PROCESS-NEXT: [[TMP144:%.*]] = load i32, ptr addrspace(21) [[TMP143]], align 4 -; POST-PROCESS-NEXT: [[TMP145:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP146:%.*]] = inttoptr i32 [[TMP145]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP147:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP146]], i32 -30 -; POST-PROCESS-NEXT: [[TMP148:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP147]], i32 0, i32 0, i64 53 -; POST-PROCESS-NEXT: [[TMP149:%.*]] = load i32, ptr addrspace(21) [[TMP148]], align 4 -; POST-PROCESS-NEXT: [[TMP150:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP131:%.*]] = add i32 [[TMP130]], -120 +; POST-PROCESS-NEXT: [[TMP132:%.*]] = add i32 [[TMP131]], 188 +; POST-PROCESS-NEXT: [[TMP133:%.*]] = inttoptr i32 [[TMP132]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP134:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP133]], i32 0 +; POST-PROCESS-NEXT: [[TMP135:%.*]] = load i32, ptr addrspace(21) [[TMP134]], align 4 +; POST-PROCESS-NEXT: [[TMP136:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP137:%.*]] = add i32 [[TMP136]], -120 +; POST-PROCESS-NEXT: [[TMP138:%.*]] = add i32 [[TMP137]], 192 +; POST-PROCESS-NEXT: [[TMP139:%.*]] = inttoptr i32 [[TMP138]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP139]], i32 0 +; POST-PROCESS-NEXT: [[TMP141:%.*]] = load i32, ptr addrspace(21) [[TMP140]], align 4 +; POST-PROCESS-NEXT: [[TMP142:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP143:%.*]] = add i32 [[TMP142]], -120 +; POST-PROCESS-NEXT: [[TMP144:%.*]] = add i32 [[TMP143]], 196 +; POST-PROCESS-NEXT: [[TMP145:%.*]] = inttoptr i32 [[TMP144]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP146:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP145]], i32 0 +; POST-PROCESS-NEXT: [[TMP147:%.*]] = load i32, ptr addrspace(21) [[TMP146]], align 4 +; POST-PROCESS-NEXT: [[TMP148:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP149:%.*]] = add i32 [[TMP148]], -120 +; POST-PROCESS-NEXT: [[TMP150:%.*]] = add i32 [[TMP149]], 200 ; POST-PROCESS-NEXT: [[TMP151:%.*]] = inttoptr i32 [[TMP150]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP152:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP151]], i32 -30 -; POST-PROCESS-NEXT: [[TMP153:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP152]], i32 0, i32 0, i64 54 -; POST-PROCESS-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(21) [[TMP153]], align 4 -; POST-PROCESS-NEXT: [[TMP155:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP156:%.*]] = inttoptr i32 [[TMP155]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP157:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP156]], i32 -30 -; POST-PROCESS-NEXT: [[TMP158:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP157]], i32 0, i32 0, i64 55 +; POST-PROCESS-NEXT: [[TMP152:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP151]], i32 0 +; POST-PROCESS-NEXT: [[TMP153:%.*]] = load i32, ptr addrspace(21) [[TMP152]], align 4 +; POST-PROCESS-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP155:%.*]] = add i32 [[TMP154]], -120 +; POST-PROCESS-NEXT: [[TMP156:%.*]] = add i32 [[TMP155]], 204 +; POST-PROCESS-NEXT: [[TMP157:%.*]] = inttoptr i32 [[TMP156]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP157]], i32 0 ; POST-PROCESS-NEXT: [[TMP159:%.*]] = load i32, ptr addrspace(21) [[TMP158]], align 4 ; POST-PROCESS-NEXT: [[TMP160:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP161:%.*]] = inttoptr i32 [[TMP160]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP162:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP161]], i32 -30 -; POST-PROCESS-NEXT: [[TMP163:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP162]], i32 0, i32 0, i64 56 -; POST-PROCESS-NEXT: [[TMP164:%.*]] = load i32, ptr addrspace(21) [[TMP163]], align 4 -; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; POST-PROCESS-NEXT: [[TMP161:%.*]] = add i32 [[TMP160]], -120 +; POST-PROCESS-NEXT: [[TMP162:%.*]] = add i32 [[TMP161]], 208 +; POST-PROCESS-NEXT: [[TMP163:%.*]] = inttoptr i32 [[TMP162]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP164:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP163]], i32 0 +; POST-PROCESS-NEXT: [[TMP165:%.*]] = load i32, ptr addrspace(21) [[TMP164]], align 4 +; POST-PROCESS-NEXT: [[TMP166:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP167:%.*]] = add i32 [[TMP166]], -120 +; POST-PROCESS-NEXT: [[TMP168:%.*]] = add i32 [[TMP167]], 212 +; POST-PROCESS-NEXT: [[TMP169:%.*]] = inttoptr i32 [[TMP168]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP170:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP169]], i32 0 +; POST-PROCESS-NEXT: [[TMP171:%.*]] = load i32, ptr addrspace(21) [[TMP170]], align 4 +; POST-PROCESS-NEXT: [[TMP172:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP173:%.*]] = add i32 [[TMP172]], -120 +; POST-PROCESS-NEXT: [[TMP174:%.*]] = add i32 [[TMP173]], 216 +; POST-PROCESS-NEXT: [[TMP175:%.*]] = inttoptr i32 [[TMP174]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP176:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP175]], i32 0 +; POST-PROCESS-NEXT: [[TMP177:%.*]] = load i32, ptr addrspace(21) [[TMP176]], align 4 +; POST-PROCESS-NEXT: [[TMP178:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP179:%.*]] = add i32 [[TMP178]], -120 +; POST-PROCESS-NEXT: [[TMP180:%.*]] = add i32 [[TMP179]], 220 +; POST-PROCESS-NEXT: [[TMP181:%.*]] = inttoptr i32 [[TMP180]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP182:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP181]], i32 0 +; POST-PROCESS-NEXT: [[TMP183:%.*]] = load i32, ptr addrspace(21) [[TMP182]], align 4 +; POST-PROCESS-NEXT: [[TMP184:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP185:%.*]] = add i32 [[TMP184]], -120 +; POST-PROCESS-NEXT: [[TMP186:%.*]] = add i32 [[TMP185]], 224 +; POST-PROCESS-NEXT: [[TMP187:%.*]] = inttoptr i32 [[TMP186]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP188:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP187]], i32 0 +; POST-PROCESS-NEXT: [[TMP189:%.*]] = load i32, ptr addrspace(21) [[TMP188]], align 4 +; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POST-PROCESS-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME:%.*]], ptr addrspace(21) [[TMP6]], i32 0, i32 1 -; POST-PROCESS-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(21) [[DOTRELOAD_ADDR]], align 4 -; POST-PROCESS-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(21) [[TMP6]], i32 0, i32 0 -; POST-PROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[RETURNADDR_RELOAD_ADDR]], align 4 -; POST-PROCESS-NEXT: store i32 [[DOTRELOAD]], ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP14]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP21]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP22]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP25]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP27]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP28]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP29]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP165:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP166:%.*]] = inttoptr i32 [[TMP165]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP167:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP166]], i32 -30 -; POST-PROCESS-NEXT: [[TMP168:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP167]], i32 0, i32 0, i64 30 -; POST-PROCESS-NEXT: store i32 [[TMP34]], ptr addrspace(21) [[TMP168]], align 4 -; POST-PROCESS-NEXT: [[TMP169:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP170:%.*]] = inttoptr i32 [[TMP169]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP171:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP170]], i32 -30 -; POST-PROCESS-NEXT: [[TMP172:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP171]], i32 0, i32 0, i64 31 -; POST-PROCESS-NEXT: store i32 [[TMP39]], ptr addrspace(21) [[TMP172]], align 4 -; POST-PROCESS-NEXT: [[TMP173:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP174:%.*]] = inttoptr i32 [[TMP173]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP175:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP174]], i32 -30 -; POST-PROCESS-NEXT: [[TMP176:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP175]], i32 0, i32 0, i64 32 -; POST-PROCESS-NEXT: store i32 [[TMP44]], ptr addrspace(21) [[TMP176]], align 4 -; POST-PROCESS-NEXT: [[TMP177:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP178:%.*]] = inttoptr i32 [[TMP177]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP179:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP178]], i32 -30 -; POST-PROCESS-NEXT: [[TMP180:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP179]], i32 0, i32 0, i64 33 -; POST-PROCESS-NEXT: store i32 [[TMP49]], ptr addrspace(21) [[TMP180]], align 4 -; POST-PROCESS-NEXT: [[TMP181:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP182:%.*]] = inttoptr i32 [[TMP181]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP183:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP182]], i32 -30 -; POST-PROCESS-NEXT: [[TMP184:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP183]], i32 0, i32 0, i64 34 -; POST-PROCESS-NEXT: store i32 [[TMP54]], ptr addrspace(21) [[TMP184]], align 4 -; POST-PROCESS-NEXT: [[TMP185:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP186:%.*]] = inttoptr i32 [[TMP185]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP187:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP186]], i32 -30 -; POST-PROCESS-NEXT: [[TMP188:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP187]], i32 0, i32 0, i64 35 -; POST-PROCESS-NEXT: store i32 [[TMP59]], ptr addrspace(21) [[TMP188]], align 4 -; POST-PROCESS-NEXT: [[TMP189:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP190:%.*]] = inttoptr i32 [[TMP189]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP191:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP190]], i32 -30 -; POST-PROCESS-NEXT: [[TMP192:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP191]], i32 0, i32 0, i64 36 -; POST-PROCESS-NEXT: store i32 [[TMP64]], ptr addrspace(21) [[TMP192]], align 4 -; POST-PROCESS-NEXT: [[TMP193:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP190:%.*]] = add i32 [[TMP4]], 116 +; POST-PROCESS-NEXT: [[TMP191:%.*]] = inttoptr i32 [[TMP190]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP192:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP191]], i32 0 +; POST-PROCESS-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP192]], align 4 +; POST-PROCESS-NEXT: [[TMP193:%.*]] = add i32 [[TMP4]], 108 ; POST-PROCESS-NEXT: [[TMP194:%.*]] = inttoptr i32 [[TMP193]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP195:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP194]], i32 -30 -; POST-PROCESS-NEXT: [[TMP196:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP195]], i32 0, i32 0, i64 37 -; POST-PROCESS-NEXT: store i32 [[TMP69]], ptr addrspace(21) [[TMP196]], align 4 -; POST-PROCESS-NEXT: [[TMP197:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP198:%.*]] = inttoptr i32 [[TMP197]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP199:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP198]], i32 -30 -; POST-PROCESS-NEXT: [[TMP200:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP199]], i32 0, i32 0, i64 38 -; POST-PROCESS-NEXT: store i32 [[TMP74]], ptr addrspace(21) [[TMP200]], align 4 +; POST-PROCESS-NEXT: [[TMP195:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP194]], i32 0 +; POST-PROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP195]], align 4 +; POST-PROCESS-NEXT: store i32 [[DOTRELOAD]], ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP6]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP14]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP21]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP22]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP25]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP27]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: [[TMP196:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP197:%.*]] = add i32 [[TMP196]], -120 +; POST-PROCESS-NEXT: [[TMP198:%.*]] = add i32 [[TMP197]], 120 +; POST-PROCESS-NEXT: [[TMP199:%.*]] = inttoptr i32 [[TMP198]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP200:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP199]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP33]], ptr addrspace(21) [[TMP200]], align 4 ; POST-PROCESS-NEXT: [[TMP201:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP202:%.*]] = inttoptr i32 [[TMP201]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP203:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP202]], i32 -30 -; POST-PROCESS-NEXT: [[TMP204:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP203]], i32 0, i32 0, i64 39 -; POST-PROCESS-NEXT: store i32 [[TMP79]], ptr addrspace(21) [[TMP204]], align 4 -; POST-PROCESS-NEXT: [[TMP205:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP206:%.*]] = inttoptr i32 [[TMP205]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP207:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP206]], i32 -30 -; POST-PROCESS-NEXT: [[TMP208:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP207]], i32 0, i32 0, i64 40 -; POST-PROCESS-NEXT: store i32 [[TMP84]], ptr addrspace(21) [[TMP208]], align 4 -; POST-PROCESS-NEXT: [[TMP209:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP210:%.*]] = inttoptr i32 [[TMP209]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP211:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP210]], i32 -30 -; POST-PROCESS-NEXT: [[TMP212:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP211]], i32 0, i32 0, i64 41 -; POST-PROCESS-NEXT: store i32 [[TMP89]], ptr addrspace(21) [[TMP212]], align 4 -; POST-PROCESS-NEXT: [[TMP213:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP202:%.*]] = add i32 [[TMP201]], -120 +; POST-PROCESS-NEXT: [[TMP203:%.*]] = add i32 [[TMP202]], 124 +; POST-PROCESS-NEXT: [[TMP204:%.*]] = inttoptr i32 [[TMP203]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP205:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP204]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP39]], ptr addrspace(21) [[TMP205]], align 4 +; POST-PROCESS-NEXT: [[TMP206:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP207:%.*]] = add i32 [[TMP206]], -120 +; POST-PROCESS-NEXT: [[TMP208:%.*]] = add i32 [[TMP207]], 128 +; POST-PROCESS-NEXT: [[TMP209:%.*]] = inttoptr i32 [[TMP208]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP210:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP209]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP45]], ptr addrspace(21) [[TMP210]], align 4 +; POST-PROCESS-NEXT: [[TMP211:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP212:%.*]] = add i32 [[TMP211]], -120 +; POST-PROCESS-NEXT: [[TMP213:%.*]] = add i32 [[TMP212]], 132 ; POST-PROCESS-NEXT: [[TMP214:%.*]] = inttoptr i32 [[TMP213]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP215:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP214]], i32 -30 -; POST-PROCESS-NEXT: [[TMP216:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP215]], i32 0, i32 0, i64 42 -; POST-PROCESS-NEXT: store i32 [[TMP94]], ptr addrspace(21) [[TMP216]], align 4 -; POST-PROCESS-NEXT: [[TMP217:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP218:%.*]] = inttoptr i32 [[TMP217]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP219:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP218]], i32 -30 -; POST-PROCESS-NEXT: [[TMP220:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP219]], i32 0, i32 0, i64 43 -; POST-PROCESS-NEXT: store i32 [[TMP99]], ptr addrspace(21) [[TMP220]], align 4 +; POST-PROCESS-NEXT: [[TMP215:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP214]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP51]], ptr addrspace(21) [[TMP215]], align 4 +; POST-PROCESS-NEXT: [[TMP216:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP217:%.*]] = add i32 [[TMP216]], -120 +; POST-PROCESS-NEXT: [[TMP218:%.*]] = add i32 [[TMP217]], 136 +; POST-PROCESS-NEXT: [[TMP219:%.*]] = inttoptr i32 [[TMP218]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP220:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP219]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP57]], ptr addrspace(21) [[TMP220]], align 4 ; POST-PROCESS-NEXT: [[TMP221:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP222:%.*]] = inttoptr i32 [[TMP221]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP223:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP222]], i32 -30 -; POST-PROCESS-NEXT: [[TMP224:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP223]], i32 0, i32 0, i64 44 -; POST-PROCESS-NEXT: store i32 [[TMP104]], ptr addrspace(21) [[TMP224]], align 4 -; POST-PROCESS-NEXT: [[TMP225:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP226:%.*]] = inttoptr i32 [[TMP225]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP227:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP226]], i32 -30 -; POST-PROCESS-NEXT: [[TMP228:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP227]], i32 0, i32 0, i64 45 -; POST-PROCESS-NEXT: store i32 [[TMP109]], ptr addrspace(21) [[TMP228]], align 4 -; POST-PROCESS-NEXT: [[TMP229:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP230:%.*]] = inttoptr i32 [[TMP229]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP231:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP230]], i32 -30 -; POST-PROCESS-NEXT: [[TMP232:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP231]], i32 0, i32 0, i64 46 -; POST-PROCESS-NEXT: store i32 [[TMP114]], ptr addrspace(21) [[TMP232]], align 4 -; POST-PROCESS-NEXT: [[TMP233:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP222:%.*]] = add i32 [[TMP221]], -120 +; POST-PROCESS-NEXT: [[TMP223:%.*]] = add i32 [[TMP222]], 140 +; POST-PROCESS-NEXT: [[TMP224:%.*]] = inttoptr i32 [[TMP223]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP225:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP224]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP63]], ptr addrspace(21) [[TMP225]], align 4 +; POST-PROCESS-NEXT: [[TMP226:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP227:%.*]] = add i32 [[TMP226]], -120 +; POST-PROCESS-NEXT: [[TMP228:%.*]] = add i32 [[TMP227]], 144 +; POST-PROCESS-NEXT: [[TMP229:%.*]] = inttoptr i32 [[TMP228]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP230:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP229]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP69]], ptr addrspace(21) [[TMP230]], align 4 +; POST-PROCESS-NEXT: [[TMP231:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP232:%.*]] = add i32 [[TMP231]], -120 +; POST-PROCESS-NEXT: [[TMP233:%.*]] = add i32 [[TMP232]], 148 ; POST-PROCESS-NEXT: [[TMP234:%.*]] = inttoptr i32 [[TMP233]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP235:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP234]], i32 -30 -; POST-PROCESS-NEXT: [[TMP236:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP235]], i32 0, i32 0, i64 47 -; POST-PROCESS-NEXT: store i32 [[TMP119]], ptr addrspace(21) [[TMP236]], align 4 -; POST-PROCESS-NEXT: [[TMP237:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP238:%.*]] = inttoptr i32 [[TMP237]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP239:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP238]], i32 -30 -; POST-PROCESS-NEXT: [[TMP240:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP239]], i32 0, i32 0, i64 48 -; POST-PROCESS-NEXT: store i32 [[TMP124]], ptr addrspace(21) [[TMP240]], align 4 +; POST-PROCESS-NEXT: [[TMP235:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP234]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP75]], ptr addrspace(21) [[TMP235]], align 4 +; POST-PROCESS-NEXT: [[TMP236:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP237:%.*]] = add i32 [[TMP236]], -120 +; POST-PROCESS-NEXT: [[TMP238:%.*]] = add i32 [[TMP237]], 152 +; POST-PROCESS-NEXT: [[TMP239:%.*]] = inttoptr i32 [[TMP238]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP240:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP239]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP81]], ptr addrspace(21) [[TMP240]], align 4 ; POST-PROCESS-NEXT: [[TMP241:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP242:%.*]] = inttoptr i32 [[TMP241]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP243:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP242]], i32 -30 -; POST-PROCESS-NEXT: [[TMP244:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP243]], i32 0, i32 0, i64 49 -; POST-PROCESS-NEXT: store i32 [[TMP129]], ptr addrspace(21) [[TMP244]], align 4 -; POST-PROCESS-NEXT: [[TMP245:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP246:%.*]] = inttoptr i32 [[TMP245]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP247:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP246]], i32 -30 -; POST-PROCESS-NEXT: [[TMP248:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP247]], i32 0, i32 0, i64 50 -; POST-PROCESS-NEXT: store i32 [[TMP134]], ptr addrspace(21) [[TMP248]], align 4 -; POST-PROCESS-NEXT: [[TMP249:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP250:%.*]] = inttoptr i32 [[TMP249]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP251:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP250]], i32 -30 -; POST-PROCESS-NEXT: [[TMP252:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP251]], i32 0, i32 0, i64 51 -; POST-PROCESS-NEXT: store i32 [[TMP139]], ptr addrspace(21) [[TMP252]], align 4 -; POST-PROCESS-NEXT: [[TMP253:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP242:%.*]] = add i32 [[TMP241]], -120 +; POST-PROCESS-NEXT: [[TMP243:%.*]] = add i32 [[TMP242]], 156 +; POST-PROCESS-NEXT: [[TMP244:%.*]] = inttoptr i32 [[TMP243]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP245:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP244]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP87]], ptr addrspace(21) [[TMP245]], align 4 +; POST-PROCESS-NEXT: [[TMP246:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP247:%.*]] = add i32 [[TMP246]], -120 +; POST-PROCESS-NEXT: [[TMP248:%.*]] = add i32 [[TMP247]], 160 +; POST-PROCESS-NEXT: [[TMP249:%.*]] = inttoptr i32 [[TMP248]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP250:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP249]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP93]], ptr addrspace(21) [[TMP250]], align 4 +; POST-PROCESS-NEXT: [[TMP251:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP252:%.*]] = add i32 [[TMP251]], -120 +; POST-PROCESS-NEXT: [[TMP253:%.*]] = add i32 [[TMP252]], 164 ; POST-PROCESS-NEXT: [[TMP254:%.*]] = inttoptr i32 [[TMP253]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP255:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP254]], i32 -30 -; POST-PROCESS-NEXT: [[TMP256:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP255]], i32 0, i32 0, i64 52 -; POST-PROCESS-NEXT: store i32 [[TMP144]], ptr addrspace(21) [[TMP256]], align 4 -; POST-PROCESS-NEXT: [[TMP257:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP258:%.*]] = inttoptr i32 [[TMP257]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP259:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP258]], i32 -30 -; POST-PROCESS-NEXT: [[TMP260:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP259]], i32 0, i32 0, i64 53 -; POST-PROCESS-NEXT: store i32 [[TMP149]], ptr addrspace(21) [[TMP260]], align 4 +; POST-PROCESS-NEXT: [[TMP255:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP254]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP99]], ptr addrspace(21) [[TMP255]], align 4 +; POST-PROCESS-NEXT: [[TMP256:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP257:%.*]] = add i32 [[TMP256]], -120 +; POST-PROCESS-NEXT: [[TMP258:%.*]] = add i32 [[TMP257]], 168 +; POST-PROCESS-NEXT: [[TMP259:%.*]] = inttoptr i32 [[TMP258]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP260:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP259]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP105]], ptr addrspace(21) [[TMP260]], align 4 ; POST-PROCESS-NEXT: [[TMP261:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP262:%.*]] = inttoptr i32 [[TMP261]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP263:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP262]], i32 -30 -; POST-PROCESS-NEXT: [[TMP264:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP263]], i32 0, i32 0, i64 54 -; POST-PROCESS-NEXT: store i32 [[TMP154]], ptr addrspace(21) [[TMP264]], align 4 -; POST-PROCESS-NEXT: [[TMP265:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP266:%.*]] = inttoptr i32 [[TMP265]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP267:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP266]], i32 -30 -; POST-PROCESS-NEXT: [[TMP268:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP267]], i32 0, i32 0, i64 55 -; POST-PROCESS-NEXT: store i32 [[TMP159]], ptr addrspace(21) [[TMP268]], align 4 -; POST-PROCESS-NEXT: [[TMP269:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP270:%.*]] = inttoptr i32 [[TMP269]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP271:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP270]], i32 -30 -; POST-PROCESS-NEXT: [[TMP272:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP271]], i32 0, i32 0, i64 56 -; POST-PROCESS-NEXT: store i32 [[TMP164]], ptr addrspace(21) [[TMP272]], align 4 -; POST-PROCESS-NEXT: [[TMP273:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP274:%.*]] = add i32 [[TMP273]], -108 -; POST-PROCESS-NEXT: store i32 [[TMP274]], ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP262:%.*]] = add i32 [[TMP261]], -120 +; POST-PROCESS-NEXT: [[TMP263:%.*]] = add i32 [[TMP262]], 172 +; POST-PROCESS-NEXT: [[TMP264:%.*]] = inttoptr i32 [[TMP263]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP265:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP264]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP111]], ptr addrspace(21) [[TMP265]], align 4 +; POST-PROCESS-NEXT: [[TMP266:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP267:%.*]] = add i32 [[TMP266]], -120 +; POST-PROCESS-NEXT: [[TMP268:%.*]] = add i32 [[TMP267]], 176 +; POST-PROCESS-NEXT: [[TMP269:%.*]] = inttoptr i32 [[TMP268]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP270:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP269]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP117]], ptr addrspace(21) [[TMP270]], align 4 +; POST-PROCESS-NEXT: [[TMP271:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP272:%.*]] = add i32 [[TMP271]], -120 +; POST-PROCESS-NEXT: [[TMP273:%.*]] = add i32 [[TMP272]], 180 +; POST-PROCESS-NEXT: [[TMP274:%.*]] = inttoptr i32 [[TMP273]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP275:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP274]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP123]], ptr addrspace(21) [[TMP275]], align 4 +; POST-PROCESS-NEXT: [[TMP276:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP277:%.*]] = add i32 [[TMP276]], -120 +; POST-PROCESS-NEXT: [[TMP278:%.*]] = add i32 [[TMP277]], 184 +; POST-PROCESS-NEXT: [[TMP279:%.*]] = inttoptr i32 [[TMP278]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP280:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP279]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP129]], ptr addrspace(21) [[TMP280]], align 4 +; POST-PROCESS-NEXT: [[TMP281:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP282:%.*]] = add i32 [[TMP281]], -120 +; POST-PROCESS-NEXT: [[TMP283:%.*]] = add i32 [[TMP282]], 188 +; POST-PROCESS-NEXT: [[TMP284:%.*]] = inttoptr i32 [[TMP283]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP285:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP284]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP135]], ptr addrspace(21) [[TMP285]], align 4 +; POST-PROCESS-NEXT: [[TMP286:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP287:%.*]] = add i32 [[TMP286]], -120 +; POST-PROCESS-NEXT: [[TMP288:%.*]] = add i32 [[TMP287]], 192 +; POST-PROCESS-NEXT: [[TMP289:%.*]] = inttoptr i32 [[TMP288]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP290:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP289]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP141]], ptr addrspace(21) [[TMP290]], align 4 +; POST-PROCESS-NEXT: [[TMP291:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP292:%.*]] = add i32 [[TMP291]], -120 +; POST-PROCESS-NEXT: [[TMP293:%.*]] = add i32 [[TMP292]], 196 +; POST-PROCESS-NEXT: [[TMP294:%.*]] = inttoptr i32 [[TMP293]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP295:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP294]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP147]], ptr addrspace(21) [[TMP295]], align 4 +; POST-PROCESS-NEXT: [[TMP296:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP297:%.*]] = add i32 [[TMP296]], -120 +; POST-PROCESS-NEXT: [[TMP298:%.*]] = add i32 [[TMP297]], 200 +; POST-PROCESS-NEXT: [[TMP299:%.*]] = inttoptr i32 [[TMP298]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP300:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP299]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP153]], ptr addrspace(21) [[TMP300]], align 4 +; POST-PROCESS-NEXT: [[TMP301:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP302:%.*]] = add i32 [[TMP301]], -120 +; POST-PROCESS-NEXT: [[TMP303:%.*]] = add i32 [[TMP302]], 204 +; POST-PROCESS-NEXT: [[TMP304:%.*]] = inttoptr i32 [[TMP303]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP305:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP304]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP159]], ptr addrspace(21) [[TMP305]], align 4 +; POST-PROCESS-NEXT: [[TMP306:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP307:%.*]] = add i32 [[TMP306]], -120 +; POST-PROCESS-NEXT: [[TMP308:%.*]] = add i32 [[TMP307]], 208 +; POST-PROCESS-NEXT: [[TMP309:%.*]] = inttoptr i32 [[TMP308]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP310:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP309]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP165]], ptr addrspace(21) [[TMP310]], align 4 +; POST-PROCESS-NEXT: [[TMP311:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP312:%.*]] = add i32 [[TMP311]], -120 +; POST-PROCESS-NEXT: [[TMP313:%.*]] = add i32 [[TMP312]], 212 +; POST-PROCESS-NEXT: [[TMP314:%.*]] = inttoptr i32 [[TMP313]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP315:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP314]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP171]], ptr addrspace(21) [[TMP315]], align 4 +; POST-PROCESS-NEXT: [[TMP316:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP317:%.*]] = add i32 [[TMP316]], -120 +; POST-PROCESS-NEXT: [[TMP318:%.*]] = add i32 [[TMP317]], 216 +; POST-PROCESS-NEXT: [[TMP319:%.*]] = inttoptr i32 [[TMP318]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP320:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP319]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP177]], ptr addrspace(21) [[TMP320]], align 4 +; POST-PROCESS-NEXT: [[TMP321:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP322:%.*]] = add i32 [[TMP321]], -120 +; POST-PROCESS-NEXT: [[TMP323:%.*]] = add i32 [[TMP322]], 220 +; POST-PROCESS-NEXT: [[TMP324:%.*]] = inttoptr i32 [[TMP323]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP325:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP324]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP183]], ptr addrspace(21) [[TMP325]], align 4 +; POST-PROCESS-NEXT: [[TMP326:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP327:%.*]] = add i32 [[TMP326]], -120 +; POST-PROCESS-NEXT: [[TMP328:%.*]] = add i32 [[TMP327]], 224 +; POST-PROCESS-NEXT: [[TMP329:%.*]] = inttoptr i32 [[TMP328]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP330:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP329]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP189]], ptr addrspace(21) [[TMP330]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 -; POST-PROCESS-NEXT: [[TMP275:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP275]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] +; POST-PROCESS-NEXT: [[TMP331:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP331]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] ; POST-PROCESS-NEXT: unreachable ; ; @@ -2315,25 +2501,24 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-SAME: ) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META20:![0-9]+]] !continuation.stacksize [[META21:![0-9]+]] !continuation.state [[META8]] { ; POST-PROCESS-GLOBAL-NEXT: AllocaSpillBB: ; POST-PROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() ; POST-PROCESS-GLOBAL-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() ; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; POST-PROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; POST-PROCESS-GLOBAL-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() +; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 108 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) -; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP5]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP6]]) +; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP5]]) +; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) ; POST-PROCESS-GLOBAL-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], -108 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(20) @REGISTERS, align 4 ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 @@ -2358,423 +2543,323 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP12]], i32 [[TMP10]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP13]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP14]], i32 0, i32 0, i64 30 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP15]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP18]], i32 [[TMP16]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP19]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP20]], i32 0, i32 0, i64 31 +; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP12]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP13]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 124 +; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP16]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP17]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 128 +; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP20]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP21]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP24]], i32 [[TMP22]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP25]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP26]], i32 0, i32 0, i64 32 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP27]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP29]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP30]], i32 [[TMP28]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP31]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP32]], i32 0, i32 0, i64 33 +; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], 132 +; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP24]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP25]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = add i32 [[TMP26]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 136 +; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP28]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP29]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[TMP30]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 140 +; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP32]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP33]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP35]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP36]], i32 [[TMP34]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP37]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP38]], i32 0, i32 0, i64 34 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP39]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = inttoptr i64 [[TMP41]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP42]], i32 [[TMP40]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP43]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP44]], i32 0, i32 0, i64 35 +; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = add i32 [[TMP34]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], 144 +; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP36]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP37]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = add i32 [[TMP38]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = add i32 [[TMP39]], 148 +; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP40]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP41]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = add i32 [[TMP42]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = add i32 [[TMP43]], 152 +; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP44]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP45]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = inttoptr i64 [[TMP47]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP48]], i32 [[TMP46]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP49]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP50]], i32 0, i32 0, i64 36 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP51]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = inttoptr i64 [[TMP53]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP54]], i32 [[TMP52]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP55]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP56]], i32 0, i32 0, i64 37 +; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 156 +; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP48]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP49]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = add i32 [[TMP50]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP51]], 160 +; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP52]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP53]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = add i32 [[TMP54]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = add i32 [[TMP55]], 164 +; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP56]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP57]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = inttoptr i64 [[TMP59]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP60]], i32 [[TMP58]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP61]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP62]], i32 0, i32 0, i64 38 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP63]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = inttoptr i64 [[TMP65]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP66]], i32 [[TMP64]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP67]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP68]], i32 0, i32 0, i64 39 +; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = add i32 [[TMP58]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = add i32 [[TMP59]], 168 +; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP60]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP61]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = add i32 [[TMP62]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = add i32 [[TMP63]], 172 +; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP64]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP65]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = add i32 [[TMP66]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = add i32 [[TMP67]], 176 +; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP68]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP69]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = inttoptr i64 [[TMP71]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP72]], i32 [[TMP70]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP73]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP74]], i32 0, i32 0, i64 40 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP75]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = inttoptr i64 [[TMP77]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP78]], i32 [[TMP76]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP79]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP80]], i32 0, i32 0, i64 41 +; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = add i32 [[TMP70]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], 180 +; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP72]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP73]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = add i32 [[TMP74]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = add i32 [[TMP75]], 184 +; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP76]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP77]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = add i32 [[TMP78]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = add i32 [[TMP79]], 188 +; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP80]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP81]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = inttoptr i64 [[TMP83]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP84]], i32 [[TMP82]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP85]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP86]], i32 0, i32 0, i64 42 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP87]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = inttoptr i64 [[TMP89]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP90]], i32 [[TMP88]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP91]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP92]], i32 0, i32 0, i64 43 +; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = add i32 [[TMP82]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = add i32 [[TMP83]], 192 +; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP84]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP85]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = add i32 [[TMP86]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = add i32 [[TMP87]], 196 +; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP88]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP89]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = add i32 [[TMP90]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = add i32 [[TMP91]], 200 +; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP92]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP93]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = inttoptr i64 [[TMP95]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP96]], i32 [[TMP94]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP97]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP98]], i32 0, i32 0, i64 44 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP99]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = inttoptr i64 [[TMP101]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP102]], i32 [[TMP100]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP103]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP104]], i32 0, i32 0, i64 45 +; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = add i32 [[TMP94]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = add i32 [[TMP95]], 204 +; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP96]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP97]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = add i32 [[TMP98]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], 208 +; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP100]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP101]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = add i32 [[TMP102]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = add i32 [[TMP103]], 212 +; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP104]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP105]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = inttoptr i64 [[TMP107]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP108]], i32 [[TMP106]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP109]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP110]], i32 0, i32 0, i64 46 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP111]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = inttoptr i64 [[TMP113]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP114]], i32 [[TMP112]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP115]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP116]], i32 0, i32 0, i64 47 +; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = add i32 [[TMP106]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = add i32 [[TMP107]], 216 +; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP108]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP109]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = add i32 [[TMP110]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = add i32 [[TMP111]], 220 +; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP112]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP113]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = add i32 [[TMP114]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = add i32 [[TMP115]], 224 +; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP116]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP117]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = inttoptr i64 [[TMP119]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP120]], i32 [[TMP118]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP121]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP122]], i32 0, i32 0, i64 48 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP123]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = inttoptr i64 [[TMP125]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP126]], i32 [[TMP124]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP127]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP128]], i32 0, i32 0, i64 49 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP129]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = inttoptr i64 [[TMP131]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP132]], i32 [[TMP130]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP133]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP134]], i32 0, i32 0, i64 50 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP135]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = inttoptr i64 [[TMP137]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP138]], i32 [[TMP136]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP139]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP140]], i32 0, i32 0, i64 51 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP141]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = inttoptr i64 [[TMP143]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP144]], i32 [[TMP142]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP145]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP146]], i32 0, i32 0, i64 52 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP147]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = inttoptr i64 [[TMP149]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP150]], i32 [[TMP148]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP151]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP152]], i32 0, i32 0, i64 53 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP153]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = inttoptr i64 [[TMP155]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP156]], i32 [[TMP154]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP157]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP158]], i32 0, i32 0, i64 54 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP159]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = inttoptr i64 [[TMP161]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP162]], i32 [[TMP160]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP163]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP164]], i32 0, i32 0, i64 55 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP165]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = inttoptr i64 [[TMP167]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP168]], i32 [[TMP166]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP169]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP170]], i32 0, i32 0, i64 56 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP171]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP172:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP173:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @main.resume.0 to i64)) -; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP172]], i64 [[TMP173]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META18:![0-9]+]], !continuation.returnedRegistercount !18 +; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = add i32 [[TMP118]], 108 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP119]], ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @main.resume.0 to i64)) +; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP120]], i64 [[TMP121]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META18:![0-9]+]], !continuation.returnedRegistercount !18 ; POST-PROCESS-GLOBAL-NEXT: unreachable ; ; ; POST-PROCESS-GLOBAL-LABEL: define dso_local void @main.resume.0( -; POST-PROCESS-GLOBAL-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META18]] !continuation [[META20]] { +; POST-PROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META18]] !continuation [[META20]] { ; POST-PROCESS-GLOBAL-NEXT: entryresume.0: ; POST-PROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP26]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP27]], i32 [[TMP25]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP28]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP29]], i32 0, i32 0, i64 30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(22) [[TMP30]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP33]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP34]], i32 [[TMP32]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP35]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP36]], i32 0, i32 0, i64 31 -; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(22) [[TMP37]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = inttoptr i64 [[TMP40]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP41]], i32 [[TMP39]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP42]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP43]], i32 0, i32 0, i64 32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(22) [[TMP44]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = inttoptr i64 [[TMP47]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP48]], i32 [[TMP46]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP49]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP50]], i32 0, i32 0, i64 33 -; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(22) [[TMP51]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP55]], i32 [[TMP53]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP56]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP57]], i32 0, i32 0, i64 34 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; POST-PROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -108 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[TMP30]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP32]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(22) [[TMP33]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = add i32 [[TMP36]], 124 +; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP37]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(22) [[TMP38]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], 128 +; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP42]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(22) [[TMP43]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = add i32 [[TMP45]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], 132 +; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP47]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(22) [[TMP48]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = add i32 [[TMP50]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP51]], 136 +; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP52]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(22) [[TMP53]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = add i32 [[TMP55]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = add i32 [[TMP56]], 140 +; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP57]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(22) [[TMP58]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = inttoptr i64 [[TMP61]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP62]], i32 [[TMP60]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP63]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP64]], i32 0, i32 0, i64 35 -; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = load i32, ptr addrspace(22) [[TMP65]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = inttoptr i64 [[TMP68]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP69]], i32 [[TMP67]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP70]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP71]], i32 0, i32 0, i64 36 -; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(22) [[TMP72]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = inttoptr i64 [[TMP75]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP76]], i32 [[TMP74]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP77]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP78]], i32 0, i32 0, i64 37 -; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(22) [[TMP79]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = inttoptr i64 [[TMP82]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP83]], i32 [[TMP81]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP84]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP85]], i32 0, i32 0, i64 38 -; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(22) [[TMP86]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = inttoptr i64 [[TMP89]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP90]], i32 [[TMP88]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP91]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP92]], i32 0, i32 0, i64 39 +; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = add i32 [[TMP60]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = add i32 [[TMP61]], 144 +; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP62]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(22) [[TMP63]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = add i32 [[TMP65]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = add i32 [[TMP66]], 148 +; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP67]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(22) [[TMP68]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = add i32 [[TMP70]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], 152 +; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP72]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(22) [[TMP73]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = add i32 [[TMP75]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = add i32 [[TMP76]], 156 +; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP77]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(22) [[TMP78]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = add i32 [[TMP80]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = add i32 [[TMP81]], 160 +; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP82]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = load i32, ptr addrspace(22) [[TMP83]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = add i32 [[TMP85]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = add i32 [[TMP86]], 164 +; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP87]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(22) [[TMP88]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = add i32 [[TMP90]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = add i32 [[TMP91]], 168 +; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP92]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(22) [[TMP93]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = inttoptr i64 [[TMP96]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP97]], i32 [[TMP95]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP98]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP99]], i32 0, i32 0, i64 40 -; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = load i32, ptr addrspace(22) [[TMP100]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = inttoptr i64 [[TMP103]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP104]], i32 [[TMP102]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP105]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP106]], i32 0, i32 0, i64 41 -; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(22) [[TMP107]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = inttoptr i64 [[TMP110]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP111]], i32 [[TMP109]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP112]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP113]], i32 0, i32 0, i64 42 -; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = load i32, ptr addrspace(22) [[TMP114]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = inttoptr i64 [[TMP117]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP118]], i32 [[TMP116]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP119]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP120]], i32 0, i32 0, i64 43 -; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = load i32, ptr addrspace(22) [[TMP121]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = inttoptr i64 [[TMP124]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP125]], i32 [[TMP123]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP126]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP127]], i32 0, i32 0, i64 44 +; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = add i32 [[TMP95]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = add i32 [[TMP96]], 172 +; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP97]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(22) [[TMP98]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = add i32 [[TMP100]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = add i32 [[TMP101]], 176 +; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP102]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = load i32, ptr addrspace(22) [[TMP103]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = add i32 [[TMP105]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = add i32 [[TMP106]], 180 +; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP107]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(22) [[TMP108]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = add i32 [[TMP110]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = add i32 [[TMP111]], 184 +; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP112]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = load i32, ptr addrspace(22) [[TMP113]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = add i32 [[TMP115]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = add i32 [[TMP116]], 188 +; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP117]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = load i32, ptr addrspace(22) [[TMP118]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = add i32 [[TMP120]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = add i32 [[TMP121]], 192 +; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP122]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(22) [[TMP123]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = add i32 [[TMP125]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = add i32 [[TMP126]], 196 +; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP127]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(22) [[TMP128]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = inttoptr i64 [[TMP131]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP132]], i32 [[TMP130]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP133]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP134]], i32 0, i32 0, i64 45 -; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = load i32, ptr addrspace(22) [[TMP135]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = inttoptr i64 [[TMP138]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP139]], i32 [[TMP137]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP140]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP141]], i32 0, i32 0, i64 46 -; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = load i32, ptr addrspace(22) [[TMP142]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = inttoptr i64 [[TMP145]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP146]], i32 [[TMP144]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP147]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP148]], i32 0, i32 0, i64 47 -; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = load i32, ptr addrspace(22) [[TMP149]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = inttoptr i64 [[TMP152]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP153]], i32 [[TMP151]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP154]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP155]], i32 0, i32 0, i64 48 -; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = load i32, ptr addrspace(22) [[TMP156]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = inttoptr i64 [[TMP159]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP160]], i32 [[TMP158]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP161]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP162]], i32 0, i32 0, i64 49 +; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = add i32 [[TMP130]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = add i32 [[TMP131]], 200 +; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP132]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = load i32, ptr addrspace(22) [[TMP133]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = add i32 [[TMP135]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = add i32 [[TMP136]], 204 +; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP137]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = load i32, ptr addrspace(22) [[TMP138]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = add i32 [[TMP140]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = add i32 [[TMP141]], 208 +; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP142]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = load i32, ptr addrspace(22) [[TMP143]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = add i32 [[TMP145]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = add i32 [[TMP146]], 212 +; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP147]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = load i32, ptr addrspace(22) [[TMP148]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = add i32 [[TMP150]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = add i32 [[TMP151]], 216 +; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP152]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(22) [[TMP153]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = add i32 [[TMP155]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = add i32 [[TMP156]], 220 +; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP157]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = load i32, ptr addrspace(22) [[TMP158]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = add i32 [[TMP160]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = add i32 [[TMP161]], 224 +; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP162]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = load i32, ptr addrspace(22) [[TMP163]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = inttoptr i64 [[TMP166]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP167]], i32 [[TMP165]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP168]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP169]], i32 0, i32 0, i64 50 -; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = load i32, ptr addrspace(22) [[TMP170]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP172:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP173:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP174:%.*]] = inttoptr i64 [[TMP173]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP175:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP174]], i32 [[TMP172]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP176:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP175]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP177:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP176]], i32 0, i32 0, i64 51 -; POST-PROCESS-GLOBAL-NEXT: [[TMP178:%.*]] = load i32, ptr addrspace(22) [[TMP177]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP179:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP180:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP181:%.*]] = inttoptr i64 [[TMP180]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP182:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP181]], i32 [[TMP179]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP183:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP182]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP184:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP183]], i32 0, i32 0, i64 52 -; POST-PROCESS-GLOBAL-NEXT: [[TMP185:%.*]] = load i32, ptr addrspace(22) [[TMP184]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP186:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP187:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP188:%.*]] = inttoptr i64 [[TMP187]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP189:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP188]], i32 [[TMP186]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP190:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP189]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP191:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP190]], i32 0, i32 0, i64 53 -; POST-PROCESS-GLOBAL-NEXT: [[TMP192:%.*]] = load i32, ptr addrspace(22) [[TMP191]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP193:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP194:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP195:%.*]] = inttoptr i64 [[TMP194]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP196:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP195]], i32 [[TMP193]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP197:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP196]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP198:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP197]], i32 0, i32 0, i64 54 -; POST-PROCESS-GLOBAL-NEXT: [[TMP199:%.*]] = load i32, ptr addrspace(22) [[TMP198]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP200:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP201:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP202:%.*]] = inttoptr i64 [[TMP201]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP203:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP202]], i32 [[TMP200]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP204:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP203]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP205:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP204]], i32 0, i32 0, i64 55 -; POST-PROCESS-GLOBAL-NEXT: [[TMP206:%.*]] = load i32, ptr addrspace(22) [[TMP205]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP207:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP208:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP209:%.*]] = inttoptr i64 [[TMP208]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP210:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP209]], i32 [[TMP207]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP211:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP210]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP212:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP211]], i32 0, i32 0, i64 56 -; POST-PROCESS-GLOBAL-NEXT: [[TMP213:%.*]] = load i32, ptr addrspace(22) [[TMP212]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; POST-PROCESS-GLOBAL-NEXT: [[TMP214:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP215:%.*]] = add i32 [[TMP214]], -108 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP215]], ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: call void @continuation.complete() +; POST-PROCESS-GLOBAL-NEXT: ret void +; POST-PROCESS-GLOBAL: entryresume.0.split: ; POST-PROCESS-GLOBAL-NEXT: unreachable ; ; @@ -2784,6 +2869,8 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 ; POST-PROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POST-PROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(22) ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 ; POST-PROCESS-GLOBAL-NEXT: store <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_0_GEP]], align 4 @@ -2814,455 +2901,347 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 ; POST-PROCESS-GLOBAL-NEXT: store i32 [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP27]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP28]], i32 [[TMP26]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP29]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP30]], i32 0, i32 0, i64 30 +; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], 120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP30]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(22) [[TMP31]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = inttoptr i64 [[TMP34]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP35]], i32 [[TMP33]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP36]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP37]], i32 0, i32 0, i64 31 -; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(22) [[TMP38]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = inttoptr i64 [[TMP41]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP42]], i32 [[TMP40]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP43]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP44]], i32 0, i32 0, i64 32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(22) [[TMP45]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = inttoptr i64 [[TMP48]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP49]], i32 [[TMP47]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP50]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP51]], i32 0, i32 0, i64 33 -; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(22) [[TMP52]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = inttoptr i64 [[TMP55]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP56]], i32 [[TMP54]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP57]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP58]], i32 0, i32 0, i64 34 -; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(22) [[TMP59]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = inttoptr i64 [[TMP62]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP63]], i32 [[TMP61]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP64]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP65]], i32 0, i32 0, i64 35 +; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = add i32 [[TMP34]], 124 +; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP35]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(22) [[TMP36]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = add i32 [[TMP38]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = add i32 [[TMP39]], 128 +; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP40]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(22) [[TMP41]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = add i32 [[TMP43]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = add i32 [[TMP44]], 132 +; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP45]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(22) [[TMP46]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = add i32 [[TMP48]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = add i32 [[TMP49]], 136 +; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP50]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(22) [[TMP51]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = add i32 [[TMP53]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = add i32 [[TMP54]], 140 +; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP55]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(22) [[TMP56]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = add i32 [[TMP58]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = add i32 [[TMP59]], 144 +; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP60]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = load i32, ptr addrspace(22) [[TMP61]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = add i32 [[TMP63]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = add i32 [[TMP64]], 148 +; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP65]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(22) [[TMP66]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = inttoptr i64 [[TMP69]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP70]], i32 [[TMP68]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP71]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP72]], i32 0, i32 0, i64 36 -; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(22) [[TMP73]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = inttoptr i64 [[TMP76]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP77]], i32 [[TMP75]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP78]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP79]], i32 0, i32 0, i64 37 -; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(22) [[TMP80]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = inttoptr i64 [[TMP83]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP84]], i32 [[TMP82]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP85]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP86]], i32 0, i32 0, i64 38 -; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(22) [[TMP87]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = inttoptr i64 [[TMP90]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP91]], i32 [[TMP89]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP92]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP93]], i32 0, i32 0, i64 39 -; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(22) [[TMP94]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = inttoptr i64 [[TMP97]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP98]], i32 [[TMP96]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP99]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP100]], i32 0, i32 0, i64 40 +; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = add i32 [[TMP68]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = add i32 [[TMP69]], 152 +; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP70]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = load i32, ptr addrspace(22) [[TMP71]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = add i32 [[TMP73]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = add i32 [[TMP74]], 156 +; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP75]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(22) [[TMP76]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = add i32 [[TMP78]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = add i32 [[TMP79]], 160 +; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP80]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = load i32, ptr addrspace(22) [[TMP81]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = add i32 [[TMP83]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = add i32 [[TMP84]], 164 +; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP85]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(22) [[TMP86]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = add i32 [[TMP88]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = add i32 [[TMP89]], 168 +; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP90]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = load i32, ptr addrspace(22) [[TMP91]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = add i32 [[TMP93]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = add i32 [[TMP94]], 172 +; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP95]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = load i32, ptr addrspace(22) [[TMP96]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = add i32 [[TMP98]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], 176 +; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP100]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = load i32, ptr addrspace(22) [[TMP101]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = inttoptr i64 [[TMP104]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP105]], i32 [[TMP103]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP106]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP107]], i32 0, i32 0, i64 41 -; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(22) [[TMP108]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = inttoptr i64 [[TMP111]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP112]], i32 [[TMP110]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP113]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP114]], i32 0, i32 0, i64 42 -; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = load i32, ptr addrspace(22) [[TMP115]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = inttoptr i64 [[TMP118]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP119]], i32 [[TMP117]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP120]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP121]], i32 0, i32 0, i64 43 -; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = load i32, ptr addrspace(22) [[TMP122]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = inttoptr i64 [[TMP125]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP126]], i32 [[TMP124]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP127]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP128]], i32 0, i32 0, i64 44 -; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = load i32, ptr addrspace(22) [[TMP129]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = inttoptr i64 [[TMP132]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP133]], i32 [[TMP131]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP134]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP135]], i32 0, i32 0, i64 45 +; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = add i32 [[TMP103]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = add i32 [[TMP104]], 180 +; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP105]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = load i32, ptr addrspace(22) [[TMP106]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = add i32 [[TMP108]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = add i32 [[TMP109]], 184 +; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP110]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = load i32, ptr addrspace(22) [[TMP111]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = add i32 [[TMP113]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = add i32 [[TMP114]], 188 +; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP115]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = load i32, ptr addrspace(22) [[TMP116]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = add i32 [[TMP118]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], 192 +; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP120]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = load i32, ptr addrspace(22) [[TMP121]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = add i32 [[TMP123]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = add i32 [[TMP124]], 196 +; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP125]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = load i32, ptr addrspace(22) [[TMP126]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = add i32 [[TMP128]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = add i32 [[TMP129]], 200 +; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP130]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = load i32, ptr addrspace(22) [[TMP131]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = add i32 [[TMP133]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = add i32 [[TMP134]], 204 +; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP135]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = load i32, ptr addrspace(22) [[TMP136]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = inttoptr i64 [[TMP139]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP140]], i32 [[TMP138]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP141]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP142]], i32 0, i32 0, i64 46 -; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = load i32, ptr addrspace(22) [[TMP143]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = inttoptr i64 [[TMP146]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP147]], i32 [[TMP145]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP148]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP149]], i32 0, i32 0, i64 47 -; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = load i32, ptr addrspace(22) [[TMP150]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = inttoptr i64 [[TMP153]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP154]], i32 [[TMP152]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP155]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP156]], i32 0, i32 0, i64 48 -; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = load i32, ptr addrspace(22) [[TMP157]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = inttoptr i64 [[TMP160]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP161]], i32 [[TMP159]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP162]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP163]], i32 0, i32 0, i64 49 -; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = load i32, ptr addrspace(22) [[TMP164]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = inttoptr i64 [[TMP167]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP168]], i32 [[TMP166]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP169]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP170]], i32 0, i32 0, i64 50 -; POST-PROCESS-GLOBAL-NEXT: [[TMP172:%.*]] = load i32, ptr addrspace(22) [[TMP171]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP173:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP174:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP175:%.*]] = inttoptr i64 [[TMP174]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP176:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP175]], i32 [[TMP173]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP177:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP176]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP178:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP177]], i32 0, i32 0, i64 51 -; POST-PROCESS-GLOBAL-NEXT: [[TMP179:%.*]] = load i32, ptr addrspace(22) [[TMP178]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP180:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP181:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP182:%.*]] = inttoptr i64 [[TMP181]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP183:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP182]], i32 [[TMP180]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP184:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP183]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP185:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP184]], i32 0, i32 0, i64 52 -; POST-PROCESS-GLOBAL-NEXT: [[TMP186:%.*]] = load i32, ptr addrspace(22) [[TMP185]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP187:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP188:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP189:%.*]] = inttoptr i64 [[TMP188]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP190:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP189]], i32 [[TMP187]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP191:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP190]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP192:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP191]], i32 0, i32 0, i64 53 -; POST-PROCESS-GLOBAL-NEXT: [[TMP193:%.*]] = load i32, ptr addrspace(22) [[TMP192]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP194:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP195:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP196:%.*]] = inttoptr i64 [[TMP195]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP197:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP196]], i32 [[TMP194]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP198:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP197]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP199:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP198]], i32 0, i32 0, i64 54 -; POST-PROCESS-GLOBAL-NEXT: [[TMP200:%.*]] = load i32, ptr addrspace(22) [[TMP199]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP201:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP202:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP203:%.*]] = inttoptr i64 [[TMP202]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP204:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP203]], i32 [[TMP201]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP205:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP204]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP206:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP205]], i32 0, i32 0, i64 55 -; POST-PROCESS-GLOBAL-NEXT: [[TMP207:%.*]] = load i32, ptr addrspace(22) [[TMP206]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP208:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP209:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP210:%.*]] = inttoptr i64 [[TMP209]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP211:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP210]], i32 [[TMP208]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP212:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP211]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP213:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspace(22) [[TMP212]], i32 0, i32 0, i64 56 -; POST-PROCESS-GLOBAL-NEXT: [[TMP214:%.*]] = load i32, ptr addrspace(22) [[TMP213]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP215:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP215]], i32 0, i32 1 +; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = add i32 [[TMP138]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = add i32 [[TMP139]], 208 +; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP140]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = load i32, ptr addrspace(22) [[TMP141]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = add i32 [[TMP143]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = add i32 [[TMP144]], 212 +; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP145]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = load i32, ptr addrspace(22) [[TMP146]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = add i32 [[TMP148]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = add i32 [[TMP149]], 216 +; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP150]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = load i32, ptr addrspace(22) [[TMP151]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = add i32 [[TMP153]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = add i32 [[TMP154]], 220 +; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP155]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = load i32, ptr addrspace(22) [[TMP156]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = add i32 [[TMP158]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = add i32 [[TMP159]], 224 +; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP160]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = load i32, ptr addrspace(22) [[TMP161]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POST-PROCESS-GLOBAL-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP163]], i32 0, i32 1 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], i32 0, i32 0 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_LOAD:%.*]] = load <2 x float>, ptr [[VAL_I_FCA_0_GEP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[VAL_I_FCA_0_LOAD]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_011_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP216:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 ; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_011_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; POST-PROCESS-GLOBAL-NEXT: [[TMP217:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 ; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POST-PROCESS-GLOBAL-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP5]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP6]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP7]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP5]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP6]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP7]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP27]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = add i32 [[TMP166]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = add i32 [[TMP167]], 120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP168]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP32]], ptr addrspace(22) [[TMP169]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = add i32 [[TMP170]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP172:%.*]] = add i32 [[TMP171]], 124 +; POST-PROCESS-GLOBAL-NEXT: [[TMP173:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP172]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP37]], ptr addrspace(22) [[TMP173]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP174:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP175:%.*]] = add i32 [[TMP174]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP176:%.*]] = add i32 [[TMP175]], 128 +; POST-PROCESS-GLOBAL-NEXT: [[TMP177:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP176]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP42]], ptr addrspace(22) [[TMP177]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP178:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP179:%.*]] = add i32 [[TMP178]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP180:%.*]] = add i32 [[TMP179]], 132 +; POST-PROCESS-GLOBAL-NEXT: [[TMP181:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP180]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP47]], ptr addrspace(22) [[TMP181]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP182:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP183:%.*]] = add i32 [[TMP182]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP184:%.*]] = add i32 [[TMP183]], 136 +; POST-PROCESS-GLOBAL-NEXT: [[TMP185:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP184]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP52]], ptr addrspace(22) [[TMP185]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP186:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP187:%.*]] = add i32 [[TMP186]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP188:%.*]] = add i32 [[TMP187]], 140 +; POST-PROCESS-GLOBAL-NEXT: [[TMP189:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP188]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP57]], ptr addrspace(22) [[TMP189]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP190:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP191:%.*]] = add i32 [[TMP190]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP192:%.*]] = add i32 [[TMP191]], 144 +; POST-PROCESS-GLOBAL-NEXT: [[TMP193:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP192]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP62]], ptr addrspace(22) [[TMP193]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP194:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP195:%.*]] = add i32 [[TMP194]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP196:%.*]] = add i32 [[TMP195]], 148 +; POST-PROCESS-GLOBAL-NEXT: [[TMP197:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP196]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP67]], ptr addrspace(22) [[TMP197]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP198:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP199:%.*]] = add i32 [[TMP198]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP200:%.*]] = add i32 [[TMP199]], 152 +; POST-PROCESS-GLOBAL-NEXT: [[TMP201:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP200]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP72]], ptr addrspace(22) [[TMP201]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP202:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP203:%.*]] = add i32 [[TMP202]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP204:%.*]] = add i32 [[TMP203]], 156 +; POST-PROCESS-GLOBAL-NEXT: [[TMP205:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP204]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP77]], ptr addrspace(22) [[TMP205]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP206:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP207:%.*]] = add i32 [[TMP206]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP208:%.*]] = add i32 [[TMP207]], 160 +; POST-PROCESS-GLOBAL-NEXT: [[TMP209:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP208]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP82]], ptr addrspace(22) [[TMP209]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP210:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP211:%.*]] = add i32 [[TMP210]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP212:%.*]] = add i32 [[TMP211]], 164 +; POST-PROCESS-GLOBAL-NEXT: [[TMP213:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP212]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP87]], ptr addrspace(22) [[TMP213]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP214:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP215:%.*]] = add i32 [[TMP214]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP216:%.*]] = add i32 [[TMP215]], 168 +; POST-PROCESS-GLOBAL-NEXT: [[TMP217:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP216]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP92]], ptr addrspace(22) [[TMP217]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP218:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP219:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP220:%.*]] = inttoptr i64 [[TMP219]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP221:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP220]], i32 [[TMP218]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP222:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP221]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP223:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP222]], i32 0, i32 0, i64 30 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP32]], ptr addrspace(22) [[TMP223]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP224:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP225:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP226:%.*]] = inttoptr i64 [[TMP225]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP227:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP226]], i32 [[TMP224]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP228:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP227]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP229:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP228]], i32 0, i32 0, i64 31 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP39]], ptr addrspace(22) [[TMP229]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP219:%.*]] = add i32 [[TMP218]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP220:%.*]] = add i32 [[TMP219]], 172 +; POST-PROCESS-GLOBAL-NEXT: [[TMP221:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP220]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP97]], ptr addrspace(22) [[TMP221]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP222:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP223:%.*]] = add i32 [[TMP222]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP224:%.*]] = add i32 [[TMP223]], 176 +; POST-PROCESS-GLOBAL-NEXT: [[TMP225:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP224]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP102]], ptr addrspace(22) [[TMP225]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP226:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP227:%.*]] = add i32 [[TMP226]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP228:%.*]] = add i32 [[TMP227]], 180 +; POST-PROCESS-GLOBAL-NEXT: [[TMP229:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP228]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP107]], ptr addrspace(22) [[TMP229]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP230:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP231:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP232:%.*]] = inttoptr i64 [[TMP231]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP233:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP232]], i32 [[TMP230]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP234:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP233]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP235:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP234]], i32 0, i32 0, i64 32 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP46]], ptr addrspace(22) [[TMP235]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP236:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP237:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP238:%.*]] = inttoptr i64 [[TMP237]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP239:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP238]], i32 [[TMP236]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP240:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP239]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP241:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP240]], i32 0, i32 0, i64 33 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP53]], ptr addrspace(22) [[TMP241]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP231:%.*]] = add i32 [[TMP230]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP232:%.*]] = add i32 [[TMP231]], 184 +; POST-PROCESS-GLOBAL-NEXT: [[TMP233:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP232]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP112]], ptr addrspace(22) [[TMP233]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP234:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP235:%.*]] = add i32 [[TMP234]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP236:%.*]] = add i32 [[TMP235]], 188 +; POST-PROCESS-GLOBAL-NEXT: [[TMP237:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP236]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP117]], ptr addrspace(22) [[TMP237]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP238:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP239:%.*]] = add i32 [[TMP238]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP240:%.*]] = add i32 [[TMP239]], 192 +; POST-PROCESS-GLOBAL-NEXT: [[TMP241:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP240]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP122]], ptr addrspace(22) [[TMP241]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP242:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP243:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP244:%.*]] = inttoptr i64 [[TMP243]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP245:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP244]], i32 [[TMP242]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP246:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP245]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP247:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP246]], i32 0, i32 0, i64 34 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP60]], ptr addrspace(22) [[TMP247]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP248:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP249:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP250:%.*]] = inttoptr i64 [[TMP249]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP251:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP250]], i32 [[TMP248]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP252:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP251]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP253:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP252]], i32 0, i32 0, i64 35 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP67]], ptr addrspace(22) [[TMP253]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP243:%.*]] = add i32 [[TMP242]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP244:%.*]] = add i32 [[TMP243]], 196 +; POST-PROCESS-GLOBAL-NEXT: [[TMP245:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP244]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP127]], ptr addrspace(22) [[TMP245]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP246:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP247:%.*]] = add i32 [[TMP246]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP248:%.*]] = add i32 [[TMP247]], 200 +; POST-PROCESS-GLOBAL-NEXT: [[TMP249:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP248]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP132]], ptr addrspace(22) [[TMP249]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP250:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP251:%.*]] = add i32 [[TMP250]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP252:%.*]] = add i32 [[TMP251]], 204 +; POST-PROCESS-GLOBAL-NEXT: [[TMP253:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP252]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP137]], ptr addrspace(22) [[TMP253]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP254:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP255:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP256:%.*]] = inttoptr i64 [[TMP255]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP257:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP256]], i32 [[TMP254]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP258:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP257]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP259:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP258]], i32 0, i32 0, i64 36 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP74]], ptr addrspace(22) [[TMP259]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP260:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP261:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP262:%.*]] = inttoptr i64 [[TMP261]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP263:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP262]], i32 [[TMP260]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP264:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP263]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP265:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP264]], i32 0, i32 0, i64 37 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP81]], ptr addrspace(22) [[TMP265]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP255:%.*]] = add i32 [[TMP254]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP256:%.*]] = add i32 [[TMP255]], 208 +; POST-PROCESS-GLOBAL-NEXT: [[TMP257:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP256]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP142]], ptr addrspace(22) [[TMP257]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP258:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP259:%.*]] = add i32 [[TMP258]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP260:%.*]] = add i32 [[TMP259]], 212 +; POST-PROCESS-GLOBAL-NEXT: [[TMP261:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP260]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP147]], ptr addrspace(22) [[TMP261]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP262:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP263:%.*]] = add i32 [[TMP262]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP264:%.*]] = add i32 [[TMP263]], 216 +; POST-PROCESS-GLOBAL-NEXT: [[TMP265:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP264]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP152]], ptr addrspace(22) [[TMP265]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP266:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP267:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP268:%.*]] = inttoptr i64 [[TMP267]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP269:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP268]], i32 [[TMP266]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP270:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP269]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP271:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP270]], i32 0, i32 0, i64 38 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP88]], ptr addrspace(22) [[TMP271]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP272:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP273:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP274:%.*]] = inttoptr i64 [[TMP273]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP275:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP274]], i32 [[TMP272]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP276:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP275]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP277:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP276]], i32 0, i32 0, i64 39 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP95]], ptr addrspace(22) [[TMP277]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP278:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP279:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP280:%.*]] = inttoptr i64 [[TMP279]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP281:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP280]], i32 [[TMP278]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP282:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP281]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP283:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP282]], i32 0, i32 0, i64 40 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP102]], ptr addrspace(22) [[TMP283]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP284:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP285:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP286:%.*]] = inttoptr i64 [[TMP285]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP287:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP286]], i32 [[TMP284]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP288:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP287]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP289:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP288]], i32 0, i32 0, i64 41 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP109]], ptr addrspace(22) [[TMP289]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP290:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP291:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP292:%.*]] = inttoptr i64 [[TMP291]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP293:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP292]], i32 [[TMP290]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP294:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP293]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP295:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP294]], i32 0, i32 0, i64 42 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP116]], ptr addrspace(22) [[TMP295]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP296:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP297:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP298:%.*]] = inttoptr i64 [[TMP297]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP299:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP298]], i32 [[TMP296]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP300:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP299]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP301:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP300]], i32 0, i32 0, i64 43 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP123]], ptr addrspace(22) [[TMP301]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP302:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP303:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP304:%.*]] = inttoptr i64 [[TMP303]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP305:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP304]], i32 [[TMP302]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP306:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP305]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP307:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP306]], i32 0, i32 0, i64 44 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP130]], ptr addrspace(22) [[TMP307]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP308:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP309:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP310:%.*]] = inttoptr i64 [[TMP309]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP311:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP310]], i32 [[TMP308]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP312:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP311]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP313:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP312]], i32 0, i32 0, i64 45 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP137]], ptr addrspace(22) [[TMP313]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP314:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP315:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP316:%.*]] = inttoptr i64 [[TMP315]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP317:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP316]], i32 [[TMP314]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP318:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP317]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP319:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP318]], i32 0, i32 0, i64 46 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP144]], ptr addrspace(22) [[TMP319]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP320:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP321:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP322:%.*]] = inttoptr i64 [[TMP321]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP323:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP322]], i32 [[TMP320]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP324:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP323]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP325:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP324]], i32 0, i32 0, i64 47 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP151]], ptr addrspace(22) [[TMP325]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP326:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP327:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP328:%.*]] = inttoptr i64 [[TMP327]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP329:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP328]], i32 [[TMP326]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP330:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP329]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP331:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP330]], i32 0, i32 0, i64 48 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP158]], ptr addrspace(22) [[TMP331]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP332:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP333:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP334:%.*]] = inttoptr i64 [[TMP333]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP335:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP334]], i32 [[TMP332]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP336:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP335]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP337:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP336]], i32 0, i32 0, i64 49 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP165]], ptr addrspace(22) [[TMP337]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP338:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP339:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP340:%.*]] = inttoptr i64 [[TMP339]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP341:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP340]], i32 [[TMP338]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP342:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP341]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP343:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP342]], i32 0, i32 0, i64 50 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP172]], ptr addrspace(22) [[TMP343]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP344:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP345:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP346:%.*]] = inttoptr i64 [[TMP345]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP347:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP346]], i32 [[TMP344]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP348:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP347]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP349:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP348]], i32 0, i32 0, i64 51 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP179]], ptr addrspace(22) [[TMP349]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP350:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP351:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP352:%.*]] = inttoptr i64 [[TMP351]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP353:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP352]], i32 [[TMP350]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP354:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP353]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP355:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP354]], i32 0, i32 0, i64 52 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP186]], ptr addrspace(22) [[TMP355]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP356:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP357:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP358:%.*]] = inttoptr i64 [[TMP357]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP359:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP358]], i32 [[TMP356]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP360:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP359]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP361:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP360]], i32 0, i32 0, i64 53 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP193]], ptr addrspace(22) [[TMP361]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP362:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP363:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP364:%.*]] = inttoptr i64 [[TMP363]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP365:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP364]], i32 [[TMP362]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP366:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP365]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP367:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP366]], i32 0, i32 0, i64 54 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP200]], ptr addrspace(22) [[TMP367]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP368:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP369:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP370:%.*]] = inttoptr i64 [[TMP369]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP371:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP370]], i32 [[TMP368]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP372:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP371]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP373:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP372]], i32 0, i32 0, i64 55 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP207]], ptr addrspace(22) [[TMP373]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP374:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP375:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP376:%.*]] = inttoptr i64 [[TMP375]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP377:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP376]], i32 [[TMP374]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP378:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP377]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP379:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP378]], i32 0, i32 0, i64 56 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP214]], ptr addrspace(22) [[TMP379]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP267:%.*]] = add i32 [[TMP266]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP268:%.*]] = add i32 [[TMP267]], 220 +; POST-PROCESS-GLOBAL-NEXT: [[TMP269:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP268]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP157]], ptr addrspace(22) [[TMP269]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP270:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP271:%.*]] = add i32 [[TMP270]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP272:%.*]] = add i32 [[TMP271]], 224 +; POST-PROCESS-GLOBAL-NEXT: [[TMP273:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP272]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP162]], ptr addrspace(22) [[TMP273]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP380:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP381:%.*]] = bitcast i32 [[TMP380]] to float -; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_012_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP381]], i32 0 +; POST-PROCESS-GLOBAL-NEXT: [[TMP274:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP275:%.*]] = bitcast i32 [[TMP274]] to float +; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_012_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP275]], i32 0 ; POST-PROCESS-GLOBAL-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; POST-PROCESS-GLOBAL-NEXT: [[TMP382:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP383:%.*]] = bitcast i32 [[TMP382]] to float -; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_012_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_012_0_VEC_INSERT]], float [[TMP383]], i32 1 +; POST-PROCESS-GLOBAL-NEXT: [[TMP276:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP277:%.*]] = bitcast i32 [[TMP276]] to float +; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_012_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_012_0_VEC_INSERT]], float [[TMP277]], i32 1 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_012_4_VEC_INSERT]], 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP384:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POST-PROCESS-GLOBAL-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP384]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]) +; POST-PROCESS-GLOBAL-NEXT: [[TMP278:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POST-PROCESS-GLOBAL-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP278]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]) ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_0_GEP1:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP1]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 @@ -3293,8 +3272,8 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_GEP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP10]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 -; POST-PROCESS-GLOBAL-NEXT: [[TMP385:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP385]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META18]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP279:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP279]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META18]] ; POST-PROCESS-GLOBAL-NEXT: unreachable ; ; @@ -3303,921 +3282,699 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: AllocaSpillBB: ; POST-PROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POST-PROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP1]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP4]], i64 108 -; POST-PROCESS-GLOBAL-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME:%.*]], ptr addrspace(22) [[TMP5]], i32 0, i32 0 -; POST-PROCESS-GLOBAL-NEXT: store i64 [[RETURNADDR]], ptr addrspace(22) [[RETURNADDR_SPILL_ADDR]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; POST-PROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 108 +; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP5]] +; POST-PROCESS-GLOBAL-NEXT: store i64 [[RETURNADDR]], ptr addrspace(22) [[TMP6]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 108 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP7]], ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP33]], i32 [[TMP31]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP34]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP35]], i32 0, i32 0, i64 30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(22) [[TMP36]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = inttoptr i64 [[TMP39]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP40]], i32 [[TMP38]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP41]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP42]], i32 0, i32 0, i64 31 +; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[TMP30]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP32]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(22) [[TMP33]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = add i32 [[TMP36]], 124 +; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP37]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(22) [[TMP38]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], 128 +; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP42]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(22) [[TMP43]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = inttoptr i64 [[TMP46]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP47]], i32 [[TMP45]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP48]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP49]], i32 0, i32 0, i64 32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(22) [[TMP50]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = inttoptr i64 [[TMP53]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP54]], i32 [[TMP52]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP55]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP56]], i32 0, i32 0, i64 33 -; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(22) [[TMP57]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = inttoptr i64 [[TMP60]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP61]], i32 [[TMP59]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP62]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP63]], i32 0, i32 0, i64 34 -; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(22) [[TMP64]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = inttoptr i64 [[TMP67]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP68]], i32 [[TMP66]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP69]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP70]], i32 0, i32 0, i64 35 -; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = load i32, ptr addrspace(22) [[TMP71]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = inttoptr i64 [[TMP74]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP75]], i32 [[TMP73]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP76]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP77]], i32 0, i32 0, i64 36 +; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = add i32 [[TMP45]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], 132 +; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP47]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(22) [[TMP48]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = add i32 [[TMP50]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP51]], 136 +; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP52]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(22) [[TMP53]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = add i32 [[TMP55]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = add i32 [[TMP56]], 140 +; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP57]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(22) [[TMP58]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = add i32 [[TMP60]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = add i32 [[TMP61]], 144 +; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP62]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(22) [[TMP63]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = add i32 [[TMP65]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = add i32 [[TMP66]], 148 +; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP67]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(22) [[TMP68]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = add i32 [[TMP70]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], 152 +; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP72]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(22) [[TMP73]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = add i32 [[TMP75]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = add i32 [[TMP76]], 156 +; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP77]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(22) [[TMP78]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = inttoptr i64 [[TMP81]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP82]], i32 [[TMP80]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP83]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP84]], i32 0, i32 0, i64 37 -; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = load i32, ptr addrspace(22) [[TMP85]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = inttoptr i64 [[TMP88]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP89]], i32 [[TMP87]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP90]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP91]], i32 0, i32 0, i64 38 -; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(22) [[TMP92]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = inttoptr i64 [[TMP95]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP96]], i32 [[TMP94]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP97]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP98]], i32 0, i32 0, i64 39 -; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(22) [[TMP99]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = inttoptr i64 [[TMP102]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP103]], i32 [[TMP101]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP104]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP105]], i32 0, i32 0, i64 40 -; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = load i32, ptr addrspace(22) [[TMP106]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = inttoptr i64 [[TMP109]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP110]], i32 [[TMP108]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP111]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP112]], i32 0, i32 0, i64 41 +; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = add i32 [[TMP80]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = add i32 [[TMP81]], 160 +; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP82]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = load i32, ptr addrspace(22) [[TMP83]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = add i32 [[TMP85]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = add i32 [[TMP86]], 164 +; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP87]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(22) [[TMP88]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = add i32 [[TMP90]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = add i32 [[TMP91]], 168 +; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP92]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(22) [[TMP93]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = add i32 [[TMP95]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = add i32 [[TMP96]], 172 +; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP97]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(22) [[TMP98]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = add i32 [[TMP100]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = add i32 [[TMP101]], 176 +; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP102]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = load i32, ptr addrspace(22) [[TMP103]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = add i32 [[TMP105]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = add i32 [[TMP106]], 180 +; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP107]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(22) [[TMP108]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = add i32 [[TMP110]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = add i32 [[TMP111]], 184 +; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP112]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = load i32, ptr addrspace(22) [[TMP113]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = inttoptr i64 [[TMP116]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP117]], i32 [[TMP115]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP118]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP119]], i32 0, i32 0, i64 42 -; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = load i32, ptr addrspace(22) [[TMP120]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = inttoptr i64 [[TMP123]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP124]], i32 [[TMP122]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP125]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP126]], i32 0, i32 0, i64 43 -; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = load i32, ptr addrspace(22) [[TMP127]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = inttoptr i64 [[TMP130]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP131]], i32 [[TMP129]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP132]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP133]], i32 0, i32 0, i64 44 -; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = load i32, ptr addrspace(22) [[TMP134]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = inttoptr i64 [[TMP137]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP138]], i32 [[TMP136]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP139]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP140]], i32 0, i32 0, i64 45 -; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = load i32, ptr addrspace(22) [[TMP141]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = inttoptr i64 [[TMP144]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP145]], i32 [[TMP143]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP146]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP147]], i32 0, i32 0, i64 46 +; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = add i32 [[TMP115]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = add i32 [[TMP116]], 188 +; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP117]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = load i32, ptr addrspace(22) [[TMP118]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = add i32 [[TMP120]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = add i32 [[TMP121]], 192 +; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP122]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(22) [[TMP123]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = add i32 [[TMP125]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = add i32 [[TMP126]], 196 +; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP127]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(22) [[TMP128]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = add i32 [[TMP130]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = add i32 [[TMP131]], 200 +; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP132]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = load i32, ptr addrspace(22) [[TMP133]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = add i32 [[TMP135]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = add i32 [[TMP136]], 204 +; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP137]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = load i32, ptr addrspace(22) [[TMP138]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = add i32 [[TMP140]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = add i32 [[TMP141]], 208 +; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP142]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = load i32, ptr addrspace(22) [[TMP143]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = add i32 [[TMP145]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = add i32 [[TMP146]], 212 +; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP147]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = load i32, ptr addrspace(22) [[TMP148]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = inttoptr i64 [[TMP151]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP152]], i32 [[TMP150]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP153]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP154]], i32 0, i32 0, i64 47 -; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = load i32, ptr addrspace(22) [[TMP155]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = inttoptr i64 [[TMP158]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP159]], i32 [[TMP157]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP160]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP161]], i32 0, i32 0, i64 48 -; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = load i32, ptr addrspace(22) [[TMP162]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = inttoptr i64 [[TMP165]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP166]], i32 [[TMP164]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP167]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP168]], i32 0, i32 0, i64 49 -; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = load i32, ptr addrspace(22) [[TMP169]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP172:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP173:%.*]] = inttoptr i64 [[TMP172]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP174:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP173]], i32 [[TMP171]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP175:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP174]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP176:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP175]], i32 0, i32 0, i64 50 -; POST-PROCESS-GLOBAL-NEXT: [[TMP177:%.*]] = load i32, ptr addrspace(22) [[TMP176]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP178:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP179:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP180:%.*]] = inttoptr i64 [[TMP179]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP181:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP180]], i32 [[TMP178]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP182:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP181]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP183:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP182]], i32 0, i32 0, i64 51 -; POST-PROCESS-GLOBAL-NEXT: [[TMP184:%.*]] = load i32, ptr addrspace(22) [[TMP183]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP185:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP186:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP187:%.*]] = inttoptr i64 [[TMP186]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP188:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP187]], i32 [[TMP185]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP189:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP188]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP190:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP189]], i32 0, i32 0, i64 52 -; POST-PROCESS-GLOBAL-NEXT: [[TMP191:%.*]] = load i32, ptr addrspace(22) [[TMP190]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP192:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP193:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP194:%.*]] = inttoptr i64 [[TMP193]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP195:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP194]], i32 [[TMP192]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP196:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP195]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP197:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP196]], i32 0, i32 0, i64 53 -; POST-PROCESS-GLOBAL-NEXT: [[TMP198:%.*]] = load i32, ptr addrspace(22) [[TMP197]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP199:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP200:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP201:%.*]] = inttoptr i64 [[TMP200]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP202:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP201]], i32 [[TMP199]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP203:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP202]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP204:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP203]], i32 0, i32 0, i64 54 -; POST-PROCESS-GLOBAL-NEXT: [[TMP205:%.*]] = load i32, ptr addrspace(22) [[TMP204]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP206:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP207:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP208:%.*]] = inttoptr i64 [[TMP207]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP209:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP208]], i32 [[TMP206]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP210:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP209]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP211:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP210]], i32 0, i32 0, i64 55 -; POST-PROCESS-GLOBAL-NEXT: [[TMP212:%.*]] = load i32, ptr addrspace(22) [[TMP211]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP213:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP214:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP215:%.*]] = inttoptr i64 [[TMP214]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP216:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP215]], i32 [[TMP213]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP217:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP216]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP218:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspace(22) [[TMP217]], i32 0, i32 0, i64 56 -; POST-PROCESS-GLOBAL-NEXT: [[TMP219:%.*]] = load i32, ptr addrspace(22) [[TMP218]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP220:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(22) [[TMP5]], i32 0, i32 1 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP220]], ptr addrspace(22) [[DOTSPILL_ADDR]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = add i32 [[TMP150]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = add i32 [[TMP151]], 216 +; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP152]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(22) [[TMP153]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = add i32 [[TMP155]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = add i32 [[TMP156]], 220 +; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP157]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = load i32, ptr addrspace(22) [[TMP158]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = add i32 [[TMP160]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = add i32 [[TMP161]], 224 +; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP162]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = load i32, ptr addrspace(22) [[TMP163]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = add i32 [[TMP4]], 116 +; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP166]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP165]], ptr addrspace(22) [[TMP167]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_053_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP221:%.*]] = bitcast float [[DOTSROA_053_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = bitcast float [[DOTSROA_053_0_VEC_EXTRACT]] to i32 ; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_053_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; POST-PROCESS-GLOBAL-NEXT: [[TMP222:%.*]] = bitcast float [[DOTSROA_053_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = bitcast float [[DOTSROA_053_4_VEC_EXTRACT]] to i32 ; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POST-PROCESS-GLOBAL-NEXT: [[TMP223:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP224:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP225:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP223]]) -; POST-PROCESS-GLOBAL-NEXT: [[TMP226:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP225]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; POST-PROCESS-GLOBAL-NEXT: [[TMP227:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP226]]) +; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP172:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP170]]) +; POST-PROCESS-GLOBAL-NEXT: [[TMP173:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP172]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; POST-PROCESS-GLOBAL-NEXT: [[TMP174:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP173]]) ; POST-PROCESS-GLOBAL-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP228:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP229:%.*]] = add i32 [[TMP228]], -108 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP229]], ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP27]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP28]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP29]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP30]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP230:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP231:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP232:%.*]] = inttoptr i64 [[TMP231]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP233:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP232]], i32 [[TMP230]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP234:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP233]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP235:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP234]], i32 0, i32 0, i64 30 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP37]], ptr addrspace(22) [[TMP235]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP236:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP237:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP238:%.*]] = inttoptr i64 [[TMP237]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP239:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP238]], i32 [[TMP236]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP240:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP239]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP241:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP240]], i32 0, i32 0, i64 31 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP44]], ptr addrspace(22) [[TMP241]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP242:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP243:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP244:%.*]] = inttoptr i64 [[TMP243]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP245:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP244]], i32 [[TMP242]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP246:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP245]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP247:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP246]], i32 0, i32 0, i64 32 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP51]], ptr addrspace(22) [[TMP247]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP248:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP249:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP250:%.*]] = inttoptr i64 [[TMP249]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP251:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP250]], i32 [[TMP248]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP252:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP251]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP253:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP252]], i32 0, i32 0, i64 33 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP58]], ptr addrspace(22) [[TMP253]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP254:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP255:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP256:%.*]] = inttoptr i64 [[TMP255]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP257:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP256]], i32 [[TMP254]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP258:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP257]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP259:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP258]], i32 0, i32 0, i64 34 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP65]], ptr addrspace(22) [[TMP259]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP260:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP261:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP262:%.*]] = inttoptr i64 [[TMP261]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP263:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP262]], i32 [[TMP260]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP264:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP263]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP265:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP264]], i32 0, i32 0, i64 35 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP72]], ptr addrspace(22) [[TMP265]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP266:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP267:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP268:%.*]] = inttoptr i64 [[TMP267]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP269:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP268]], i32 [[TMP266]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP270:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP269]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP271:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP270]], i32 0, i32 0, i64 36 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP79]], ptr addrspace(22) [[TMP271]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP272:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP273:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP274:%.*]] = inttoptr i64 [[TMP273]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP275:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP274]], i32 [[TMP272]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP276:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP275]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP277:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP276]], i32 0, i32 0, i64 37 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP86]], ptr addrspace(22) [[TMP277]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP278:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP279:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP280:%.*]] = inttoptr i64 [[TMP279]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP281:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP280]], i32 [[TMP278]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP282:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP281]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP283:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP282]], i32 0, i32 0, i64 38 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP93]], ptr addrspace(22) [[TMP283]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP284:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP285:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP286:%.*]] = inttoptr i64 [[TMP285]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP287:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP286]], i32 [[TMP284]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP288:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP287]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP289:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP288]], i32 0, i32 0, i64 39 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP100]], ptr addrspace(22) [[TMP289]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP290:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP291:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP292:%.*]] = inttoptr i64 [[TMP291]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP293:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP292]], i32 [[TMP290]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP294:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP293]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP295:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP294]], i32 0, i32 0, i64 40 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP107]], ptr addrspace(22) [[TMP295]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP296:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP297:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP298:%.*]] = inttoptr i64 [[TMP297]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP299:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP298]], i32 [[TMP296]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP300:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP299]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP301:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP300]], i32 0, i32 0, i64 41 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP114]], ptr addrspace(22) [[TMP301]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP302:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP303:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP304:%.*]] = inttoptr i64 [[TMP303]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP305:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP304]], i32 [[TMP302]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP306:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP305]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP307:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP306]], i32 0, i32 0, i64 42 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP121]], ptr addrspace(22) [[TMP307]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP308:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP309:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP310:%.*]] = inttoptr i64 [[TMP309]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP311:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP310]], i32 [[TMP308]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP312:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP311]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP313:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP312]], i32 0, i32 0, i64 43 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP128]], ptr addrspace(22) [[TMP313]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP314:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP315:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP316:%.*]] = inttoptr i64 [[TMP315]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP317:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP316]], i32 [[TMP314]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP318:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP317]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP319:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP318]], i32 0, i32 0, i64 44 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP135]], ptr addrspace(22) [[TMP319]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP320:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP321:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP322:%.*]] = inttoptr i64 [[TMP321]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP323:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP322]], i32 [[TMP320]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP324:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP323]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP325:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP324]], i32 0, i32 0, i64 45 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP142]], ptr addrspace(22) [[TMP325]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP326:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP327:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP328:%.*]] = inttoptr i64 [[TMP327]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP329:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP328]], i32 [[TMP326]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP330:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP329]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP331:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP330]], i32 0, i32 0, i64 46 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP149]], ptr addrspace(22) [[TMP331]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP332:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP333:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP334:%.*]] = inttoptr i64 [[TMP333]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP335:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP334]], i32 [[TMP332]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP336:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP335]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP337:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP336]], i32 0, i32 0, i64 47 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP156]], ptr addrspace(22) [[TMP337]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP338:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP339:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP340:%.*]] = inttoptr i64 [[TMP339]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP341:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP340]], i32 [[TMP338]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP342:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP341]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP343:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP342]], i32 0, i32 0, i64 48 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP163]], ptr addrspace(22) [[TMP343]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP344:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP345:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP346:%.*]] = inttoptr i64 [[TMP345]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP347:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP346]], i32 [[TMP344]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP348:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP347]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP349:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP348]], i32 0, i32 0, i64 49 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP170]], ptr addrspace(22) [[TMP349]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP350:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP351:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP352:%.*]] = inttoptr i64 [[TMP351]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP353:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP352]], i32 [[TMP350]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP354:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP353]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP355:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP354]], i32 0, i32 0, i64 50 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP177]], ptr addrspace(22) [[TMP355]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP356:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP357:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP358:%.*]] = inttoptr i64 [[TMP357]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP359:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP358]], i32 [[TMP356]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP360:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP359]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP361:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP360]], i32 0, i32 0, i64 51 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP184]], ptr addrspace(22) [[TMP361]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP362:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP363:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP364:%.*]] = inttoptr i64 [[TMP363]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP365:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP364]], i32 [[TMP362]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP366:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP365]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP367:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP366]], i32 0, i32 0, i64 52 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP191]], ptr addrspace(22) [[TMP367]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP368:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP369:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP370:%.*]] = inttoptr i64 [[TMP369]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP371:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP370]], i32 [[TMP368]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP372:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP371]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP373:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP372]], i32 0, i32 0, i64 53 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP198]], ptr addrspace(22) [[TMP373]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP374:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP375:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP376:%.*]] = inttoptr i64 [[TMP375]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP377:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP376]], i32 [[TMP374]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP378:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP377]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP379:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP378]], i32 0, i32 0, i64 54 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP205]], ptr addrspace(22) [[TMP379]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP380:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP381:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP382:%.*]] = inttoptr i64 [[TMP381]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP383:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP382]], i32 [[TMP380]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP384:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP383]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP385:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP384]], i32 0, i32 0, i64 55 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP212]], ptr addrspace(22) [[TMP385]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP386:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP387:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP388:%.*]] = inttoptr i64 [[TMP387]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP389:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP388]], i32 [[TMP386]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP390:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP389]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP391:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(22) [[TMP390]], i32 0, i32 0, i64 56 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP219]], ptr addrspace(22) [[TMP391]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP392:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP393:%.*]] = add i32 [[TMP392]], 12 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP393]], ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP394:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP395:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @ClosestHit.resume.0 to i64)) -; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP394]], i64 [[TMP395]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META18]], !continuation.returnedRegistercount !18 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP7]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP27]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP28]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP29]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP175:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP176:%.*]] = add i32 [[TMP175]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP177:%.*]] = add i32 [[TMP176]], 120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP178:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP177]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP34]], ptr addrspace(22) [[TMP178]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP179:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP180:%.*]] = add i32 [[TMP179]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP181:%.*]] = add i32 [[TMP180]], 124 +; POST-PROCESS-GLOBAL-NEXT: [[TMP182:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP181]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP39]], ptr addrspace(22) [[TMP182]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP183:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP184:%.*]] = add i32 [[TMP183]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP185:%.*]] = add i32 [[TMP184]], 128 +; POST-PROCESS-GLOBAL-NEXT: [[TMP186:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP185]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP44]], ptr addrspace(22) [[TMP186]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP187:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP188:%.*]] = add i32 [[TMP187]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP189:%.*]] = add i32 [[TMP188]], 132 +; POST-PROCESS-GLOBAL-NEXT: [[TMP190:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP189]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP49]], ptr addrspace(22) [[TMP190]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP191:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP192:%.*]] = add i32 [[TMP191]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP193:%.*]] = add i32 [[TMP192]], 136 +; POST-PROCESS-GLOBAL-NEXT: [[TMP194:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP193]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP54]], ptr addrspace(22) [[TMP194]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP195:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP196:%.*]] = add i32 [[TMP195]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP197:%.*]] = add i32 [[TMP196]], 140 +; POST-PROCESS-GLOBAL-NEXT: [[TMP198:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP197]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP59]], ptr addrspace(22) [[TMP198]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP199:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP200:%.*]] = add i32 [[TMP199]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP201:%.*]] = add i32 [[TMP200]], 144 +; POST-PROCESS-GLOBAL-NEXT: [[TMP202:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP201]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP64]], ptr addrspace(22) [[TMP202]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP203:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP204:%.*]] = add i32 [[TMP203]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP205:%.*]] = add i32 [[TMP204]], 148 +; POST-PROCESS-GLOBAL-NEXT: [[TMP206:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP205]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP69]], ptr addrspace(22) [[TMP206]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP207:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP208:%.*]] = add i32 [[TMP207]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP209:%.*]] = add i32 [[TMP208]], 152 +; POST-PROCESS-GLOBAL-NEXT: [[TMP210:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP209]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP74]], ptr addrspace(22) [[TMP210]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP211:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP212:%.*]] = add i32 [[TMP211]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP213:%.*]] = add i32 [[TMP212]], 156 +; POST-PROCESS-GLOBAL-NEXT: [[TMP214:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP213]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP79]], ptr addrspace(22) [[TMP214]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP215:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP216:%.*]] = add i32 [[TMP215]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP217:%.*]] = add i32 [[TMP216]], 160 +; POST-PROCESS-GLOBAL-NEXT: [[TMP218:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP217]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP84]], ptr addrspace(22) [[TMP218]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP219:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP220:%.*]] = add i32 [[TMP219]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP221:%.*]] = add i32 [[TMP220]], 164 +; POST-PROCESS-GLOBAL-NEXT: [[TMP222:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP221]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP89]], ptr addrspace(22) [[TMP222]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP223:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP224:%.*]] = add i32 [[TMP223]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP225:%.*]] = add i32 [[TMP224]], 168 +; POST-PROCESS-GLOBAL-NEXT: [[TMP226:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP225]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP94]], ptr addrspace(22) [[TMP226]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP227:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP228:%.*]] = add i32 [[TMP227]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP229:%.*]] = add i32 [[TMP228]], 172 +; POST-PROCESS-GLOBAL-NEXT: [[TMP230:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP229]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP99]], ptr addrspace(22) [[TMP230]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP231:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP232:%.*]] = add i32 [[TMP231]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP233:%.*]] = add i32 [[TMP232]], 176 +; POST-PROCESS-GLOBAL-NEXT: [[TMP234:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP233]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP104]], ptr addrspace(22) [[TMP234]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP235:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP236:%.*]] = add i32 [[TMP235]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP237:%.*]] = add i32 [[TMP236]], 180 +; POST-PROCESS-GLOBAL-NEXT: [[TMP238:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP237]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP109]], ptr addrspace(22) [[TMP238]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP239:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP240:%.*]] = add i32 [[TMP239]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP241:%.*]] = add i32 [[TMP240]], 184 +; POST-PROCESS-GLOBAL-NEXT: [[TMP242:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP241]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP114]], ptr addrspace(22) [[TMP242]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP243:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP244:%.*]] = add i32 [[TMP243]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP245:%.*]] = add i32 [[TMP244]], 188 +; POST-PROCESS-GLOBAL-NEXT: [[TMP246:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP245]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP119]], ptr addrspace(22) [[TMP246]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP247:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP248:%.*]] = add i32 [[TMP247]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP249:%.*]] = add i32 [[TMP248]], 192 +; POST-PROCESS-GLOBAL-NEXT: [[TMP250:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP249]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP124]], ptr addrspace(22) [[TMP250]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP251:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP252:%.*]] = add i32 [[TMP251]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP253:%.*]] = add i32 [[TMP252]], 196 +; POST-PROCESS-GLOBAL-NEXT: [[TMP254:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP253]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP129]], ptr addrspace(22) [[TMP254]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP255:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP256:%.*]] = add i32 [[TMP255]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP257:%.*]] = add i32 [[TMP256]], 200 +; POST-PROCESS-GLOBAL-NEXT: [[TMP258:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP257]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP134]], ptr addrspace(22) [[TMP258]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP259:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP260:%.*]] = add i32 [[TMP259]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP261:%.*]] = add i32 [[TMP260]], 204 +; POST-PROCESS-GLOBAL-NEXT: [[TMP262:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP261]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP139]], ptr addrspace(22) [[TMP262]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP263:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP264:%.*]] = add i32 [[TMP263]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP265:%.*]] = add i32 [[TMP264]], 208 +; POST-PROCESS-GLOBAL-NEXT: [[TMP266:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP265]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP144]], ptr addrspace(22) [[TMP266]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP267:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP268:%.*]] = add i32 [[TMP267]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP269:%.*]] = add i32 [[TMP268]], 212 +; POST-PROCESS-GLOBAL-NEXT: [[TMP270:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP269]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP149]], ptr addrspace(22) [[TMP270]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP271:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP272:%.*]] = add i32 [[TMP271]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP273:%.*]] = add i32 [[TMP272]], 216 +; POST-PROCESS-GLOBAL-NEXT: [[TMP274:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP273]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP154]], ptr addrspace(22) [[TMP274]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP275:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP276:%.*]] = add i32 [[TMP275]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP277:%.*]] = add i32 [[TMP276]], 220 +; POST-PROCESS-GLOBAL-NEXT: [[TMP278:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP277]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP159]], ptr addrspace(22) [[TMP278]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP279:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP280:%.*]] = add i32 [[TMP279]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP281:%.*]] = add i32 [[TMP280]], 224 +; POST-PROCESS-GLOBAL-NEXT: [[TMP282:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP281]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP164]], ptr addrspace(22) [[TMP282]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP283:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP284:%.*]] = add i32 [[TMP283]], 120 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP284]], ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP285:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP286:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @ClosestHit.resume.0 to i64)) +; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP285]], i64 [[TMP286]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META18]], !continuation.returnedRegistercount !18 ; POST-PROCESS-GLOBAL-NEXT: unreachable ; ; ; POST-PROCESS-GLOBAL-LABEL: define dso_local void @ClosestHit.resume.0( -; POST-PROCESS-GLOBAL-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META24]] !continuation.registercount [[META18]] !continuation [[META25]] { +; POST-PROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META24]] !continuation.registercount [[META18]] !continuation [[META25]] { ; POST-PROCESS-GLOBAL-NEXT: entryresume.0: ; POST-PROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -12 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP3]], ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP6]], i32 [[TMP4]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP7]], i64 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP33]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP34]], i32 [[TMP32]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP35]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP36]], i32 0, i32 0, i64 30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(22) [[TMP37]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = inttoptr i64 [[TMP40]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP41]], i32 [[TMP39]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP42]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP43]], i32 0, i32 0, i64 31 -; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(22) [[TMP44]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = inttoptr i64 [[TMP47]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP48]], i32 [[TMP46]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP49]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP50]], i32 0, i32 0, i64 32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(22) [[TMP51]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP55]], i32 [[TMP53]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP56]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP57]], i32 0, i32 0, i64 33 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; POST-PROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -120 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[TMP30]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP32]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(22) [[TMP33]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = add i32 [[TMP36]], 124 +; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP37]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(22) [[TMP38]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], 128 +; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP42]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(22) [[TMP43]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = add i32 [[TMP45]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], 132 +; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP47]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(22) [[TMP48]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = add i32 [[TMP50]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP51]], 136 +; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP52]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(22) [[TMP53]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = add i32 [[TMP55]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = add i32 [[TMP56]], 140 +; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP57]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(22) [[TMP58]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = inttoptr i64 [[TMP61]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP62]], i32 [[TMP60]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP63]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP64]], i32 0, i32 0, i64 34 -; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = load i32, ptr addrspace(22) [[TMP65]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = inttoptr i64 [[TMP68]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP69]], i32 [[TMP67]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP70]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP71]], i32 0, i32 0, i64 35 -; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(22) [[TMP72]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = inttoptr i64 [[TMP75]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP76]], i32 [[TMP74]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP77]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP78]], i32 0, i32 0, i64 36 -; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(22) [[TMP79]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = inttoptr i64 [[TMP82]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP83]], i32 [[TMP81]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP84]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP85]], i32 0, i32 0, i64 37 -; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(22) [[TMP86]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = inttoptr i64 [[TMP89]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP90]], i32 [[TMP88]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP91]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP92]], i32 0, i32 0, i64 38 +; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = add i32 [[TMP60]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = add i32 [[TMP61]], 144 +; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP62]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(22) [[TMP63]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = add i32 [[TMP65]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = add i32 [[TMP66]], 148 +; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP67]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(22) [[TMP68]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = add i32 [[TMP70]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], 152 +; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP72]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(22) [[TMP73]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = add i32 [[TMP75]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = add i32 [[TMP76]], 156 +; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP77]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(22) [[TMP78]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = add i32 [[TMP80]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = add i32 [[TMP81]], 160 +; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP82]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = load i32, ptr addrspace(22) [[TMP83]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = add i32 [[TMP85]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = add i32 [[TMP86]], 164 +; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP87]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(22) [[TMP88]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = add i32 [[TMP90]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = add i32 [[TMP91]], 168 +; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP92]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(22) [[TMP93]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = inttoptr i64 [[TMP96]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP97]], i32 [[TMP95]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP98]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP99]], i32 0, i32 0, i64 39 -; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = load i32, ptr addrspace(22) [[TMP100]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = inttoptr i64 [[TMP103]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP104]], i32 [[TMP102]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP105]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP106]], i32 0, i32 0, i64 40 -; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(22) [[TMP107]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = inttoptr i64 [[TMP110]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP111]], i32 [[TMP109]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP112]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP113]], i32 0, i32 0, i64 41 -; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = load i32, ptr addrspace(22) [[TMP114]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = inttoptr i64 [[TMP117]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP118]], i32 [[TMP116]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP119]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP120]], i32 0, i32 0, i64 42 -; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = load i32, ptr addrspace(22) [[TMP121]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = inttoptr i64 [[TMP124]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP125]], i32 [[TMP123]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP126]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP127]], i32 0, i32 0, i64 43 +; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = add i32 [[TMP95]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = add i32 [[TMP96]], 172 +; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP97]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(22) [[TMP98]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = add i32 [[TMP100]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = add i32 [[TMP101]], 176 +; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP102]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = load i32, ptr addrspace(22) [[TMP103]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = add i32 [[TMP105]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = add i32 [[TMP106]], 180 +; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP107]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(22) [[TMP108]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = add i32 [[TMP110]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = add i32 [[TMP111]], 184 +; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP112]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = load i32, ptr addrspace(22) [[TMP113]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = add i32 [[TMP115]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = add i32 [[TMP116]], 188 +; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP117]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = load i32, ptr addrspace(22) [[TMP118]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = add i32 [[TMP120]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = add i32 [[TMP121]], 192 +; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP122]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(22) [[TMP123]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = add i32 [[TMP125]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = add i32 [[TMP126]], 196 +; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP127]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(22) [[TMP128]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = inttoptr i64 [[TMP131]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP132]], i32 [[TMP130]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP133]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP134]], i32 0, i32 0, i64 44 -; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = load i32, ptr addrspace(22) [[TMP135]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = inttoptr i64 [[TMP138]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP139]], i32 [[TMP137]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP140]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP141]], i32 0, i32 0, i64 45 -; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = load i32, ptr addrspace(22) [[TMP142]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = inttoptr i64 [[TMP145]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP146]], i32 [[TMP144]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP147]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP148]], i32 0, i32 0, i64 46 -; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = load i32, ptr addrspace(22) [[TMP149]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = inttoptr i64 [[TMP152]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP153]], i32 [[TMP151]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP154]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP155]], i32 0, i32 0, i64 47 -; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = load i32, ptr addrspace(22) [[TMP156]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = inttoptr i64 [[TMP159]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP160]], i32 [[TMP158]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP161]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP162]], i32 0, i32 0, i64 48 +; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = add i32 [[TMP130]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = add i32 [[TMP131]], 200 +; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP132]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = load i32, ptr addrspace(22) [[TMP133]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = add i32 [[TMP135]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = add i32 [[TMP136]], 204 +; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP137]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = load i32, ptr addrspace(22) [[TMP138]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = add i32 [[TMP140]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = add i32 [[TMP141]], 208 +; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP142]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = load i32, ptr addrspace(22) [[TMP143]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = add i32 [[TMP145]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = add i32 [[TMP146]], 212 +; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP147]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = load i32, ptr addrspace(22) [[TMP148]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = add i32 [[TMP150]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = add i32 [[TMP151]], 216 +; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP152]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(22) [[TMP153]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = add i32 [[TMP155]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = add i32 [[TMP156]], 220 +; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP157]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = load i32, ptr addrspace(22) [[TMP158]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = add i32 [[TMP160]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = add i32 [[TMP161]], 224 +; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP162]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = load i32, ptr addrspace(22) [[TMP163]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = inttoptr i64 [[TMP166]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP167]], i32 [[TMP165]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP168]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP169]], i32 0, i32 0, i64 49 -; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = load i32, ptr addrspace(22) [[TMP170]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP172:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP173:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP174:%.*]] = inttoptr i64 [[TMP173]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP175:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP174]], i32 [[TMP172]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP176:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP175]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP177:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP176]], i32 0, i32 0, i64 50 -; POST-PROCESS-GLOBAL-NEXT: [[TMP178:%.*]] = load i32, ptr addrspace(22) [[TMP177]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP179:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP180:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP181:%.*]] = inttoptr i64 [[TMP180]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP182:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP181]], i32 [[TMP179]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP183:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP182]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP184:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP183]], i32 0, i32 0, i64 51 -; POST-PROCESS-GLOBAL-NEXT: [[TMP185:%.*]] = load i32, ptr addrspace(22) [[TMP184]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP186:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP187:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP188:%.*]] = inttoptr i64 [[TMP187]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP189:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP188]], i32 [[TMP186]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP190:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP189]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP191:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP190]], i32 0, i32 0, i64 52 -; POST-PROCESS-GLOBAL-NEXT: [[TMP192:%.*]] = load i32, ptr addrspace(22) [[TMP191]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP193:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP194:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP195:%.*]] = inttoptr i64 [[TMP194]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP196:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP195]], i32 [[TMP193]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP197:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP196]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP198:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP197]], i32 0, i32 0, i64 53 -; POST-PROCESS-GLOBAL-NEXT: [[TMP199:%.*]] = load i32, ptr addrspace(22) [[TMP198]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP200:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP201:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP202:%.*]] = inttoptr i64 [[TMP201]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP203:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP202]], i32 [[TMP200]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP204:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP203]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP205:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP204]], i32 0, i32 0, i64 54 -; POST-PROCESS-GLOBAL-NEXT: [[TMP206:%.*]] = load i32, ptr addrspace(22) [[TMP205]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP207:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP208:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP209:%.*]] = inttoptr i64 [[TMP208]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP210:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP209]], i32 [[TMP207]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP211:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP210]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP212:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP211]], i32 0, i32 0, i64 55 -; POST-PROCESS-GLOBAL-NEXT: [[TMP213:%.*]] = load i32, ptr addrspace(22) [[TMP212]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP214:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP215:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP216:%.*]] = inttoptr i64 [[TMP215]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP217:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP216]], i32 [[TMP214]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP218:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP217]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP219:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP218]], i32 0, i32 0, i64 56 -; POST-PROCESS-GLOBAL-NEXT: [[TMP220:%.*]] = load i32, ptr addrspace(22) [[TMP219]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POST-PROCESS-GLOBAL-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME:%.*]], ptr addrspace(22) [[TMP8]], i32 0, i32 1 -; POST-PROCESS-GLOBAL-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(22) [[DOTRELOAD_ADDR]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(22) [[TMP8]], i32 0, i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(22) [[RETURNADDR_RELOAD_ADDR]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = add i32 [[TMP6]], 116 +; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP165]] +; POST-PROCESS-GLOBAL-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(22) [[TMP166]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = add i32 [[TMP6]], 108 +; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP167]] +; POST-PROCESS-GLOBAL-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(22) [[TMP168]], align 4 ; POST-PROCESS-GLOBAL-NEXT: store i32 [[DOTRELOAD]], ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP27]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP28]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP29]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP30]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP31]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP7]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP27]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP28]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP29]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = add i32 [[TMP169]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = add i32 [[TMP170]], 120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP172:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP171]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP34]], ptr addrspace(22) [[TMP172]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP173:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP174:%.*]] = add i32 [[TMP173]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP175:%.*]] = add i32 [[TMP174]], 124 +; POST-PROCESS-GLOBAL-NEXT: [[TMP176:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP175]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP39]], ptr addrspace(22) [[TMP176]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP177:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP178:%.*]] = add i32 [[TMP177]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP179:%.*]] = add i32 [[TMP178]], 128 +; POST-PROCESS-GLOBAL-NEXT: [[TMP180:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP179]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP44]], ptr addrspace(22) [[TMP180]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP181:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP182:%.*]] = add i32 [[TMP181]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP183:%.*]] = add i32 [[TMP182]], 132 +; POST-PROCESS-GLOBAL-NEXT: [[TMP184:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP183]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP49]], ptr addrspace(22) [[TMP184]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP185:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP186:%.*]] = add i32 [[TMP185]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP187:%.*]] = add i32 [[TMP186]], 136 +; POST-PROCESS-GLOBAL-NEXT: [[TMP188:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP187]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP54]], ptr addrspace(22) [[TMP188]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP189:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP190:%.*]] = add i32 [[TMP189]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP191:%.*]] = add i32 [[TMP190]], 140 +; POST-PROCESS-GLOBAL-NEXT: [[TMP192:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP191]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP59]], ptr addrspace(22) [[TMP192]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP193:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP194:%.*]] = add i32 [[TMP193]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP195:%.*]] = add i32 [[TMP194]], 144 +; POST-PROCESS-GLOBAL-NEXT: [[TMP196:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP195]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP64]], ptr addrspace(22) [[TMP196]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP197:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP198:%.*]] = add i32 [[TMP197]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP199:%.*]] = add i32 [[TMP198]], 148 +; POST-PROCESS-GLOBAL-NEXT: [[TMP200:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP199]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP69]], ptr addrspace(22) [[TMP200]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP201:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP202:%.*]] = add i32 [[TMP201]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP203:%.*]] = add i32 [[TMP202]], 152 +; POST-PROCESS-GLOBAL-NEXT: [[TMP204:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP203]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP74]], ptr addrspace(22) [[TMP204]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP205:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP206:%.*]] = add i32 [[TMP205]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP207:%.*]] = add i32 [[TMP206]], 156 +; POST-PROCESS-GLOBAL-NEXT: [[TMP208:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP207]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP79]], ptr addrspace(22) [[TMP208]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP209:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP210:%.*]] = add i32 [[TMP209]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP211:%.*]] = add i32 [[TMP210]], 160 +; POST-PROCESS-GLOBAL-NEXT: [[TMP212:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP211]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP84]], ptr addrspace(22) [[TMP212]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP213:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP214:%.*]] = add i32 [[TMP213]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP215:%.*]] = add i32 [[TMP214]], 164 +; POST-PROCESS-GLOBAL-NEXT: [[TMP216:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP215]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP89]], ptr addrspace(22) [[TMP216]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP217:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP218:%.*]] = add i32 [[TMP217]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP219:%.*]] = add i32 [[TMP218]], 168 +; POST-PROCESS-GLOBAL-NEXT: [[TMP220:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP219]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP94]], ptr addrspace(22) [[TMP220]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP221:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP222:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP223:%.*]] = inttoptr i64 [[TMP222]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP224:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP223]], i32 [[TMP221]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP225:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP224]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP226:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP225]], i32 0, i32 0, i64 30 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP38]], ptr addrspace(22) [[TMP226]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP227:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP228:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP229:%.*]] = inttoptr i64 [[TMP228]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP230:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP229]], i32 [[TMP227]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP231:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP230]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP232:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP231]], i32 0, i32 0, i64 31 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP45]], ptr addrspace(22) [[TMP232]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP222:%.*]] = add i32 [[TMP221]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP223:%.*]] = add i32 [[TMP222]], 172 +; POST-PROCESS-GLOBAL-NEXT: [[TMP224:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP223]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP99]], ptr addrspace(22) [[TMP224]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP225:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP226:%.*]] = add i32 [[TMP225]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP227:%.*]] = add i32 [[TMP226]], 176 +; POST-PROCESS-GLOBAL-NEXT: [[TMP228:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP227]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP104]], ptr addrspace(22) [[TMP228]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP229:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP230:%.*]] = add i32 [[TMP229]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP231:%.*]] = add i32 [[TMP230]], 180 +; POST-PROCESS-GLOBAL-NEXT: [[TMP232:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP231]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP109]], ptr addrspace(22) [[TMP232]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP233:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP234:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP235:%.*]] = inttoptr i64 [[TMP234]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP236:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP235]], i32 [[TMP233]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP237:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP236]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP238:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP237]], i32 0, i32 0, i64 32 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP52]], ptr addrspace(22) [[TMP238]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP239:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP240:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP241:%.*]] = inttoptr i64 [[TMP240]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP242:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP241]], i32 [[TMP239]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP243:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP242]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP244:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP243]], i32 0, i32 0, i64 33 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP59]], ptr addrspace(22) [[TMP244]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP234:%.*]] = add i32 [[TMP233]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP235:%.*]] = add i32 [[TMP234]], 184 +; POST-PROCESS-GLOBAL-NEXT: [[TMP236:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP235]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP114]], ptr addrspace(22) [[TMP236]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP237:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP238:%.*]] = add i32 [[TMP237]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP239:%.*]] = add i32 [[TMP238]], 188 +; POST-PROCESS-GLOBAL-NEXT: [[TMP240:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP239]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP119]], ptr addrspace(22) [[TMP240]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP241:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP242:%.*]] = add i32 [[TMP241]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP243:%.*]] = add i32 [[TMP242]], 192 +; POST-PROCESS-GLOBAL-NEXT: [[TMP244:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP243]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP124]], ptr addrspace(22) [[TMP244]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP245:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP246:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP247:%.*]] = inttoptr i64 [[TMP246]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP248:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP247]], i32 [[TMP245]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP249:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP248]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP250:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP249]], i32 0, i32 0, i64 34 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP66]], ptr addrspace(22) [[TMP250]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP251:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP252:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP253:%.*]] = inttoptr i64 [[TMP252]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP254:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP253]], i32 [[TMP251]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP255:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP254]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP256:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP255]], i32 0, i32 0, i64 35 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP73]], ptr addrspace(22) [[TMP256]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP246:%.*]] = add i32 [[TMP245]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP247:%.*]] = add i32 [[TMP246]], 196 +; POST-PROCESS-GLOBAL-NEXT: [[TMP248:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP247]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP129]], ptr addrspace(22) [[TMP248]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP249:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP250:%.*]] = add i32 [[TMP249]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP251:%.*]] = add i32 [[TMP250]], 200 +; POST-PROCESS-GLOBAL-NEXT: [[TMP252:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP251]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP134]], ptr addrspace(22) [[TMP252]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP253:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP254:%.*]] = add i32 [[TMP253]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP255:%.*]] = add i32 [[TMP254]], 204 +; POST-PROCESS-GLOBAL-NEXT: [[TMP256:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP255]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP139]], ptr addrspace(22) [[TMP256]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP257:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP258:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP259:%.*]] = inttoptr i64 [[TMP258]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP260:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP259]], i32 [[TMP257]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP261:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP260]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP262:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP261]], i32 0, i32 0, i64 36 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP80]], ptr addrspace(22) [[TMP262]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP263:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP264:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP265:%.*]] = inttoptr i64 [[TMP264]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP266:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP265]], i32 [[TMP263]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP267:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP266]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP268:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP267]], i32 0, i32 0, i64 37 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP87]], ptr addrspace(22) [[TMP268]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP258:%.*]] = add i32 [[TMP257]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP259:%.*]] = add i32 [[TMP258]], 208 +; POST-PROCESS-GLOBAL-NEXT: [[TMP260:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP259]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP144]], ptr addrspace(22) [[TMP260]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP261:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP262:%.*]] = add i32 [[TMP261]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP263:%.*]] = add i32 [[TMP262]], 212 +; POST-PROCESS-GLOBAL-NEXT: [[TMP264:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP263]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP149]], ptr addrspace(22) [[TMP264]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP265:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP266:%.*]] = add i32 [[TMP265]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP267:%.*]] = add i32 [[TMP266]], 216 +; POST-PROCESS-GLOBAL-NEXT: [[TMP268:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP267]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP154]], ptr addrspace(22) [[TMP268]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP269:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP270:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP271:%.*]] = inttoptr i64 [[TMP270]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP272:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP271]], i32 [[TMP269]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP273:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP272]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP274:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP273]], i32 0, i32 0, i64 38 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP94]], ptr addrspace(22) [[TMP274]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP275:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP276:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP277:%.*]] = inttoptr i64 [[TMP276]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP278:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP277]], i32 [[TMP275]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP279:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP278]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP280:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP279]], i32 0, i32 0, i64 39 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP101]], ptr addrspace(22) [[TMP280]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP281:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP282:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP283:%.*]] = inttoptr i64 [[TMP282]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP284:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP283]], i32 [[TMP281]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP285:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP284]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP286:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP285]], i32 0, i32 0, i64 40 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP108]], ptr addrspace(22) [[TMP286]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP287:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP288:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP289:%.*]] = inttoptr i64 [[TMP288]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP290:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP289]], i32 [[TMP287]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP291:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP290]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP292:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP291]], i32 0, i32 0, i64 41 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP115]], ptr addrspace(22) [[TMP292]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP293:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP294:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP295:%.*]] = inttoptr i64 [[TMP294]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP296:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP295]], i32 [[TMP293]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP297:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP296]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP298:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP297]], i32 0, i32 0, i64 42 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP122]], ptr addrspace(22) [[TMP298]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP299:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP300:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP301:%.*]] = inttoptr i64 [[TMP300]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP302:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP301]], i32 [[TMP299]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP303:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP302]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP304:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP303]], i32 0, i32 0, i64 43 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP129]], ptr addrspace(22) [[TMP304]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP305:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP306:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP307:%.*]] = inttoptr i64 [[TMP306]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP308:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP307]], i32 [[TMP305]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP309:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP308]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP310:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP309]], i32 0, i32 0, i64 44 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP136]], ptr addrspace(22) [[TMP310]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP311:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP312:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP313:%.*]] = inttoptr i64 [[TMP312]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP314:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP313]], i32 [[TMP311]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP315:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP314]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP316:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP315]], i32 0, i32 0, i64 45 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP143]], ptr addrspace(22) [[TMP316]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP317:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP318:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP319:%.*]] = inttoptr i64 [[TMP318]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP320:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP319]], i32 [[TMP317]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP321:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP320]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP322:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP321]], i32 0, i32 0, i64 46 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP150]], ptr addrspace(22) [[TMP322]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP323:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP324:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP325:%.*]] = inttoptr i64 [[TMP324]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP326:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP325]], i32 [[TMP323]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP327:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP326]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP328:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP327]], i32 0, i32 0, i64 47 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP157]], ptr addrspace(22) [[TMP328]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP329:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP330:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP331:%.*]] = inttoptr i64 [[TMP330]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP332:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP331]], i32 [[TMP329]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP333:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP332]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP334:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP333]], i32 0, i32 0, i64 48 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP164]], ptr addrspace(22) [[TMP334]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP335:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP336:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP337:%.*]] = inttoptr i64 [[TMP336]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP338:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP337]], i32 [[TMP335]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP339:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP338]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP340:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP339]], i32 0, i32 0, i64 49 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP171]], ptr addrspace(22) [[TMP340]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP341:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP342:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP343:%.*]] = inttoptr i64 [[TMP342]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP344:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP343]], i32 [[TMP341]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP345:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP344]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP346:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP345]], i32 0, i32 0, i64 50 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP178]], ptr addrspace(22) [[TMP346]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP347:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP348:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP349:%.*]] = inttoptr i64 [[TMP348]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP350:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP349]], i32 [[TMP347]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP351:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP350]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP352:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP351]], i32 0, i32 0, i64 51 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP185]], ptr addrspace(22) [[TMP352]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP353:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP354:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP355:%.*]] = inttoptr i64 [[TMP354]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP356:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP355]], i32 [[TMP353]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP357:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP356]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP358:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP357]], i32 0, i32 0, i64 52 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP192]], ptr addrspace(22) [[TMP358]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP359:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP360:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP361:%.*]] = inttoptr i64 [[TMP360]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP362:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP361]], i32 [[TMP359]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP363:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP362]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP364:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP363]], i32 0, i32 0, i64 53 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP199]], ptr addrspace(22) [[TMP364]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP365:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP366:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP367:%.*]] = inttoptr i64 [[TMP366]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP368:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP367]], i32 [[TMP365]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP369:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP368]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP370:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP369]], i32 0, i32 0, i64 54 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP206]], ptr addrspace(22) [[TMP370]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP371:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP372:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP373:%.*]] = inttoptr i64 [[TMP372]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP374:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP373]], i32 [[TMP371]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP375:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP374]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP376:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP375]], i32 0, i32 0, i64 55 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP213]], ptr addrspace(22) [[TMP376]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP377:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP378:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() -; POST-PROCESS-GLOBAL-NEXT: [[TMP379:%.*]] = inttoptr i64 [[TMP378]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[TMP380:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP379]], i32 [[TMP377]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP381:%.*]] = getelementptr i32, ptr addrspace(22) [[TMP380]], i32 -30 -; POST-PROCESS-GLOBAL-NEXT: [[TMP382:%.*]] = getelementptr [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(22) [[TMP381]], i32 0, i32 0, i64 56 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP220]], ptr addrspace(22) [[TMP382]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP383:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP384:%.*]] = add i32 [[TMP383]], -108 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP384]], ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP270:%.*]] = add i32 [[TMP269]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP271:%.*]] = add i32 [[TMP270]], 220 +; POST-PROCESS-GLOBAL-NEXT: [[TMP272:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP271]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP159]], ptr addrspace(22) [[TMP272]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP273:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP274:%.*]] = add i32 [[TMP273]], -120 +; POST-PROCESS-GLOBAL-NEXT: [[TMP275:%.*]] = add i32 [[TMP274]], 224 +; POST-PROCESS-GLOBAL-NEXT: [[TMP276:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP275]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP164]], ptr addrspace(22) [[TMP276]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP385:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP385]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META18]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP277:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP277]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META18]] ; POST-PROCESS-GLOBAL-NEXT: unreachable ; diff --git a/shared/continuations/test/dx/register-buffer.ll b/llvmraytracing/test/dx/register-buffer.ll similarity index 90% rename from shared/continuations/test/dx/register-buffer.ll rename to llvmraytracing/test/dx/register-buffer.ll index 8b75660f34..5a73e2278a 100644 --- a/shared/continuations/test/dx/register-buffer.ll +++ b/llvmraytracing/test/dx/register-buffer.ll @@ -2,12 +2,12 @@ ; RUN: opt --verify-each -passes='register-buffer,lint,instsimplify' -S %s 2> %t.stderr | FileCheck %s ; RUN: count 0 < %t.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @GLOBAL = external global [20 x i32], !registerbuffer !1 @GLOBAL_NO_REGS = external global [20 x i32], !registerbuffer !2 -!1 = !{ i32 15, i32 21 } -!2 = !{ i32 0, i32 21 } +!1 = !{ i32 15, i32 32 } +!2 = !{ i32 0, i32 32 } %complex_type = type { %complex_type*, half, %complex_type addrspace(1)* } @@ -40,10 +40,10 @@ define i32 @load_i32_reg2() { define i32 @load_i32_mem() { ; CHECK-LABEL: define i32 @load_i32_mem() { -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP2]], i32 0, i32 15 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(21) [[TMP3]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 15 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(32) [[TMP3]], align 4 ; CHECK-NEXT: ret i32 [[TMP4]] ; %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 15 @@ -54,9 +54,9 @@ define i32 @load_i32_mem() { define i32 @load_i32_dyn(i32 %i) { ; CHECK-LABEL: define i32 @load_i32_dyn( ; CHECK-SAME: i32 [[I:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP2]], i32 0, i32 [[I]] +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 [[I]] ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr [20 x i32], ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i32 0, i32 [[I]] ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(20) ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr addrspace(20) [[TMP4]] to i32 @@ -67,7 +67,7 @@ define i32 @load_i32_dyn(i32 %i) { ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) [[TMP4]], align 4 ; CHECK-NEXT: br label [[TMP12:%.*]] ; CHECK: 10: -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(21) [[TMP3]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(32) [[TMP3]], align 4 ; CHECK-NEXT: br label [[TMP12]] ; CHECK: 12: ; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP9]], [[TMP8]] ], [ [[TMP11]], [[TMP10]] ] @@ -81,9 +81,9 @@ define i32 @load_i32_dyn(i32 %i) { define i32 @load_i32_dyn_no_regs(i32 %i) { ; CHECK-LABEL: define i32 @load_i32_dyn_no_regs( ; CHECK-SAME: i32 [[I:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a0i32(ptr addrspace(20) @GLOBAL_NO_REGS) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP1]], i32 0, i32 [[I]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(21) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a0i32(ptr addrspace(20) @GLOBAL_NO_REGS) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP1]], i32 0, i32 [[I]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(32) [[TMP2]], align 4 ; CHECK-NEXT: ret i32 [[TMP3]] ; %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL_NO_REGS, i32 0, i32 %i @@ -111,13 +111,13 @@ define i64 @load_i64_reg() { define i64 @load_i64_mem() { ; CHECK-LABEL: define i64 @load_i64_mem() { ; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP2]], i32 0, i32 14 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 14 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 56) to ptr addrspace(20)), align 4 ; CHECK-NEXT: [[VAL_FCA_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(21) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(21) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(32) [[TMP5]], align 4 ; CHECK-NEXT: [[VAL_FCA_INSERT_1:%.*]] = insertvalue <{ i32, i32 }> [[VAL_FCA_INSERT_0]], i32 [[TMP6]], 1 ; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_INSERT_1]], ptr [[VAL_FCA_ALLOCA]], align 1 ; CHECK-NEXT: [[VAL_FCA_ALLOCA_LOAD:%.*]] = load i64, ptr [[VAL_FCA_ALLOCA]], align 8 @@ -133,9 +133,9 @@ define i64 @load_i64_dyn(i32 %i) { ; CHECK-LABEL: define i64 @load_i64_dyn( ; CHECK-SAME: i32 [[I:%.*]]) { ; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP2]], i32 0, i32 [[I]] +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 [[I]] ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr [20 x i32], ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i32 0, i32 [[I]] ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(20) ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr addrspace(20) [[TMP4]] to i32 @@ -146,12 +146,12 @@ define i64 @load_i64_dyn(i32 %i) { ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) [[TMP4]], align 4 ; CHECK-NEXT: br label [[TMP12:%.*]] ; CHECK: 10: -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(21) [[TMP3]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(32) [[TMP3]], align 4 ; CHECK-NEXT: br label [[TMP12]] ; CHECK: 12: ; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP9]], [[TMP8]] ], [ [[TMP11]], [[TMP10]] ] ; CHECK-NEXT: [[VAL_FCA_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP13]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(21) [[TMP3]], i32 0, i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 ; CHECK-NEXT: [[VAL_FCA_GEP_1:%.*]] = getelementptr inbounds <{ i32, i32 }>, ptr [[ADDR]], i32 0, i32 1 ; CHECK-NEXT: [[TMP15:%.*]] = addrspacecast ptr [[VAL_FCA_GEP_1]] to ptr addrspace(20) ; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr addrspace(20) [[TMP15]] to i32 @@ -162,7 +162,7 @@ define i64 @load_i64_dyn(i32 %i) { ; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) [[TMP15]], align 4 ; CHECK-NEXT: br label [[TMP23:%.*]] ; CHECK: 21: -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(21) [[TMP14]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(32) [[TMP14]], align 4 ; CHECK-NEXT: br label [[TMP23]] ; CHECK: 23: ; CHECK-NEXT: [[TMP24:%.*]] = phi i32 [ [[TMP20]], [[TMP19]] ], [ [[TMP22]], [[TMP21]] ] @@ -202,10 +202,10 @@ define i32 @store_i32_reg2(i32 %val) { define i32 @store_i32_mem(i32 %val) { ; CHECK-LABEL: define i32 @store_i32_mem( ; CHECK-SAME: i32 [[VAL:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP2]], i32 0, i32 15 -; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(21) [[TMP3]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 15 +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(32) [[TMP3]], align 4 ; CHECK-NEXT: ret i32 [[VAL]] ; %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 15 @@ -216,9 +216,9 @@ define i32 @store_i32_mem(i32 %val) { define i32 @store_i32_dyn(i32 %val, i32 %i) { ; CHECK-LABEL: define i32 @store_i32_dyn( ; CHECK-SAME: i32 [[VAL:%.*]], i32 [[I:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP2]], i32 0, i32 [[I]] +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 [[I]] ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr [20 x i32], ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i32 0, i32 [[I]] ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(20) ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr addrspace(20) [[TMP4]] to i32 @@ -229,7 +229,7 @@ define i32 @store_i32_dyn(i32 %val, i32 %i) { ; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(20) [[TMP4]], align 4 ; CHECK-NEXT: br label [[TMP10:%.*]] ; CHECK: 9: -; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(21) [[TMP3]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(32) [[TMP3]], align 4 ; CHECK-NEXT: br label [[TMP10]] ; CHECK: 10: ; CHECK-NEXT: ret i32 [[VAL]] @@ -282,19 +282,19 @@ define i32 @load_unaligned_i32_reg2() { define i32 @load_unaligned_i32_mem() { ; CHECK-LABEL: define i32 @load_unaligned_i32_mem() { ; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i8, i8, i8, i8 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP2]], i32 0, i32 15 -; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr addrspace(21) [[TMP3]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 15 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr addrspace(32) [[TMP3]], align 2 ; CHECK-NEXT: [[VAL_FCA_INSERT_0:%.*]] = insertvalue <{ i8, i8, i8, i8 }> poison, i8 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(21) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr addrspace(21) [[TMP5]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr addrspace(32) [[TMP5]], align 1 ; CHECK-NEXT: [[VAL_FCA_INSERT_1:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_0]], i8 [[TMP6]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(21) [[TMP3]], i32 0, i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(21) [[TMP7]], align 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(32) [[TMP7]], align 2 ; CHECK-NEXT: [[VAL_FCA_INSERT_2:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_1]], i8 [[TMP8]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(21) [[TMP3]], i32 0, i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(21) [[TMP9]], align 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 3 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(32) [[TMP9]], align 1 ; CHECK-NEXT: [[VAL_FCA_INSERT_3:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_2]], i8 [[TMP10]], 3 ; CHECK-NEXT: store <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_3]], ptr [[VAL_FCA_ALLOCA]], align 1 ; CHECK-NEXT: [[VAL_FCA_ALLOCA_LOAD:%.*]] = load i32, ptr [[VAL_FCA_ALLOCA]], align 8 @@ -309,9 +309,9 @@ define i32 @load_unaligned_i32_dyn(i32 %i) { ; CHECK-LABEL: define i32 @load_unaligned_i32_dyn( ; CHECK-SAME: i32 [[I:%.*]]) { ; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i8, i8, i8, i8 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP2]], i32 0, i32 [[I]] +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 [[I]] ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr [20 x i32], ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i32 0, i32 [[I]] ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(20) ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr addrspace(20) [[TMP4]] to i32 @@ -322,12 +322,12 @@ define i32 @load_unaligned_i32_dyn(i32 %i) { ; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr addrspace(20) [[TMP4]], align 2 ; CHECK-NEXT: br label [[TMP12:%.*]] ; CHECK: 10: -; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr addrspace(21) [[TMP3]], align 2 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr addrspace(32) [[TMP3]], align 2 ; CHECK-NEXT: br label [[TMP12]] ; CHECK: 12: ; CHECK-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP9]], [[TMP8]] ], [ [[TMP11]], [[TMP10]] ] ; CHECK-NEXT: [[VAL_FCA_INSERT_0:%.*]] = insertvalue <{ i8, i8, i8, i8 }> poison, i8 [[TMP13]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(21) [[TMP3]], i32 0, i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 ; CHECK-NEXT: [[VAL_FCA_GEP_1:%.*]] = getelementptr inbounds <{ i8, i8, i8, i8 }>, ptr [[ADDR]], i32 0, i32 1 ; CHECK-NEXT: [[TMP15:%.*]] = addrspacecast ptr [[VAL_FCA_GEP_1]] to ptr addrspace(20) ; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr addrspace(20) [[TMP15]] to i32 @@ -338,12 +338,12 @@ define i32 @load_unaligned_i32_dyn(i32 %i) { ; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr addrspace(20) [[TMP15]], align 1 ; CHECK-NEXT: br label [[TMP23:%.*]] ; CHECK: 21: -; CHECK-NEXT: [[TMP22:%.*]] = load i8, ptr addrspace(21) [[TMP14]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = load i8, ptr addrspace(32) [[TMP14]], align 1 ; CHECK-NEXT: br label [[TMP23]] ; CHECK: 23: ; CHECK-NEXT: [[TMP24:%.*]] = phi i8 [ [[TMP20]], [[TMP19]] ], [ [[TMP22]], [[TMP21]] ] ; CHECK-NEXT: [[VAL_FCA_INSERT_1:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_0]], i8 [[TMP24]], 1 -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(21) [[TMP3]], i32 0, i32 2 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 2 ; CHECK-NEXT: [[VAL_FCA_GEP_2:%.*]] = getelementptr inbounds <{ i8, i8, i8, i8 }>, ptr [[ADDR]], i32 0, i32 2 ; CHECK-NEXT: [[TMP26:%.*]] = addrspacecast ptr [[VAL_FCA_GEP_2]] to ptr addrspace(20) ; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(20) [[TMP26]] to i32 @@ -354,12 +354,12 @@ define i32 @load_unaligned_i32_dyn(i32 %i) { ; CHECK-NEXT: [[TMP31:%.*]] = load i8, ptr addrspace(20) [[TMP26]], align 2 ; CHECK-NEXT: br label [[TMP34:%.*]] ; CHECK: 32: -; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr addrspace(21) [[TMP25]], align 2 +; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr addrspace(32) [[TMP25]], align 2 ; CHECK-NEXT: br label [[TMP34]] ; CHECK: 34: ; CHECK-NEXT: [[TMP35:%.*]] = phi i8 [ [[TMP31]], [[TMP30]] ], [ [[TMP33]], [[TMP32]] ] ; CHECK-NEXT: [[VAL_FCA_INSERT_2:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_1]], i8 [[TMP35]], 2 -; CHECK-NEXT: [[TMP36:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(21) [[TMP3]], i32 0, i32 3 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 3 ; CHECK-NEXT: [[VAL_FCA_GEP_3:%.*]] = getelementptr inbounds <{ i8, i8, i8, i8 }>, ptr [[ADDR]], i32 0, i32 3 ; CHECK-NEXT: [[TMP37:%.*]] = addrspacecast ptr [[VAL_FCA_GEP_3]] to ptr addrspace(20) ; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr addrspace(20) [[TMP37]] to i32 @@ -370,7 +370,7 @@ define i32 @load_unaligned_i32_dyn(i32 %i) { ; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr addrspace(20) [[TMP37]], align 1 ; CHECK-NEXT: br label [[TMP45:%.*]] ; CHECK: 43: -; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr addrspace(21) [[TMP36]], align 1 +; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr addrspace(32) [[TMP36]], align 1 ; CHECK-NEXT: br label [[TMP45]] ; CHECK: 45: ; CHECK-NEXT: [[TMP46:%.*]] = phi i8 [ [[TMP42]], [[TMP41]] ], [ [[TMP44]], [[TMP43]] ] @@ -430,9 +430,9 @@ define i32 @store_unaligned_i32_dyn(i32 %val, i32 %i) { ; CHECK-LABEL: define i32 @store_unaligned_i32_dyn( ; CHECK-SAME: i32 [[VAL:%.*]], i32 [[I:%.*]]) { ; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i8, i8, i8, i8 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP2]], i32 0, i32 [[I]] +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 [[I]] ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr [20 x i32], ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i32 0, i32 [[I]] ; CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_FCA_ALLOCA]], align 8 ; CHECK-NEXT: [[VAL_FCA_ALLOCA_LOAD:%.*]] = load <{ i8, i8, i8, i8 }>, ptr [[VAL_FCA_ALLOCA]], align 1 @@ -446,10 +446,10 @@ define i32 @store_unaligned_i32_dyn(i32 %val, i32 %i) { ; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_0]], ptr addrspace(20) [[TMP4]], align 2 ; CHECK-NEXT: br label [[TMP10:%.*]] ; CHECK: 9: -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_0]], ptr addrspace(21) [[TMP3]], align 2 +; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_0]], ptr addrspace(32) [[TMP3]], align 2 ; CHECK-NEXT: br label [[TMP10]] ; CHECK: 10: -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(21) [[TMP3]], i32 0, i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 ; CHECK-NEXT: [[VAL_FCA_GEP_1:%.*]] = getelementptr inbounds <{ i8, i8, i8, i8 }>, ptr [[ADDR]], i32 0, i32 1 ; CHECK-NEXT: [[VAL_FCA_EXTRACT_1:%.*]] = extractvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_ALLOCA_LOAD]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[VAL_FCA_GEP_1]] to ptr addrspace(20) @@ -461,10 +461,10 @@ define i32 @store_unaligned_i32_dyn(i32 %val, i32 %i) { ; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_1]], ptr addrspace(20) [[TMP12]], align 1 ; CHECK-NEXT: br label [[TMP18:%.*]] ; CHECK: 17: -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_1]], ptr addrspace(21) [[TMP11]], align 1 +; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_1]], ptr addrspace(32) [[TMP11]], align 1 ; CHECK-NEXT: br label [[TMP18]] ; CHECK: 18: -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(21) [[TMP3]], i32 0, i32 2 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 2 ; CHECK-NEXT: [[VAL_FCA_GEP_2:%.*]] = getelementptr inbounds <{ i8, i8, i8, i8 }>, ptr [[ADDR]], i32 0, i32 2 ; CHECK-NEXT: [[VAL_FCA_EXTRACT_2:%.*]] = extractvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_ALLOCA_LOAD]], 2 ; CHECK-NEXT: [[TMP20:%.*]] = addrspacecast ptr [[VAL_FCA_GEP_2]] to ptr addrspace(20) @@ -476,10 +476,10 @@ define i32 @store_unaligned_i32_dyn(i32 %val, i32 %i) { ; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_2]], ptr addrspace(20) [[TMP20]], align 2 ; CHECK-NEXT: br label [[TMP26:%.*]] ; CHECK: 25: -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_2]], ptr addrspace(21) [[TMP19]], align 2 +; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_2]], ptr addrspace(32) [[TMP19]], align 2 ; CHECK-NEXT: br label [[TMP26]] ; CHECK: 26: -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(21) [[TMP3]], i32 0, i32 3 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 3 ; CHECK-NEXT: [[VAL_FCA_GEP_3:%.*]] = getelementptr inbounds <{ i8, i8, i8, i8 }>, ptr [[ADDR]], i32 0, i32 3 ; CHECK-NEXT: [[VAL_FCA_EXTRACT_3:%.*]] = extractvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_ALLOCA_LOAD]], 3 ; CHECK-NEXT: [[TMP28:%.*]] = addrspacecast ptr [[VAL_FCA_GEP_3]] to ptr addrspace(20) @@ -491,7 +491,7 @@ define i32 @store_unaligned_i32_dyn(i32 %val, i32 %i) { ; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_3]], ptr addrspace(20) [[TMP28]], align 1 ; CHECK-NEXT: br label [[TMP34:%.*]] ; CHECK: 33: -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_3]], ptr addrspace(21) [[TMP27]], align 1 +; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_3]], ptr addrspace(32) [[TMP27]], align 1 ; CHECK-NEXT: br label [[TMP34]] ; CHECK: 34: ; CHECK-NEXT: ret i32 [[VAL]] @@ -533,25 +533,25 @@ define %complex_type @load_struct_mem() { ; CHECK-LABEL: define %complex_type @load_struct_mem() { ; CHECK-NEXT: [[VAL_FCA_0_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 ; CHECK-NEXT: [[VAL_FCA_2_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP2]], i32 0, i32 15 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(21) [[TMP3]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 15 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(32) [[TMP3]], align 4 ; CHECK-NEXT: [[VAL_FCA_0_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(21) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(21) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(32) [[TMP5]], align 4 ; CHECK-NEXT: [[VAL_FCA_0_INSERT_1:%.*]] = insertvalue <{ i32, i32 }> [[VAL_FCA_0_INSERT_0]], i32 [[TMP6]], 1 ; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_0_INSERT_1]], ptr [[VAL_FCA_0_ALLOCA]], align 1 ; CHECK-NEXT: [[VAL_FCA_0_ALLOCA_LOAD:%.*]] = load ptr, ptr [[VAL_FCA_0_ALLOCA]], align 8 ; CHECK-NEXT: [[VAL_FCA_0_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE:%.*]] poison, ptr [[VAL_FCA_0_ALLOCA_LOAD]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(21) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = load half, ptr addrspace(21) [[TMP7]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = load half, ptr addrspace(32) [[TMP7]], align 4 ; CHECK-NEXT: [[VAL_FCA_1_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE]] [[VAL_FCA_0_INSERT]], half [[TMP8]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(21) [[TMP3]], i32 0, i32 2 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(21) [[TMP9]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(32) [[TMP9]], align 4 ; CHECK-NEXT: [[VAL_FCA_2_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(21) [[TMP9]], i32 0, i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(21) [[TMP11]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP9]], i32 0, i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(32) [[TMP11]], align 4 ; CHECK-NEXT: [[VAL_FCA_2_INSERT_1:%.*]] = insertvalue <{ i32, i32 }> [[VAL_FCA_2_INSERT_0]], i32 [[TMP12]], 1 ; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_2_INSERT_1]], ptr [[VAL_FCA_2_ALLOCA]], align 1 ; CHECK-NEXT: [[VAL_FCA_2_ALLOCA_LOAD:%.*]] = load ptr addrspace(1), ptr [[VAL_FCA_2_ALLOCA]], align 8 @@ -568,9 +568,9 @@ define %complex_type @load_struct_both() { ; CHECK-LABEL: define %complex_type @load_struct_both() { ; CHECK-NEXT: [[VAL_FCA_0_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 ; CHECK-NEXT: [[VAL_FCA_2_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP2]], i32 0, i32 13 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 13 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 52) to ptr addrspace(20)), align 4 ; CHECK-NEXT: [[VAL_FCA_0_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP4]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 56) to ptr addrspace(20)), align 4 @@ -578,14 +578,14 @@ define %complex_type @load_struct_both() { ; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_0_INSERT_1]], ptr [[VAL_FCA_0_ALLOCA]], align 1 ; CHECK-NEXT: [[VAL_FCA_0_ALLOCA_LOAD:%.*]] = load ptr, ptr [[VAL_FCA_0_ALLOCA]], align 8 ; CHECK-NEXT: [[VAL_FCA_0_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE:%.*]] poison, ptr [[VAL_FCA_0_ALLOCA_LOAD]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(21) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = load half, ptr addrspace(21) [[TMP6]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = load half, ptr addrspace(32) [[TMP6]], align 4 ; CHECK-NEXT: [[VAL_FCA_1_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE]] [[VAL_FCA_0_INSERT]], half [[TMP7]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(21) [[TMP3]], i32 0, i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(21) [[TMP8]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 2 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(32) [[TMP8]], align 4 ; CHECK-NEXT: [[VAL_FCA_2_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(21) [[TMP8]], i32 0, i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(21) [[TMP10]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP8]], i32 0, i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(32) [[TMP10]], align 4 ; CHECK-NEXT: [[VAL_FCA_2_INSERT_1:%.*]] = insertvalue <{ i32, i32 }> [[VAL_FCA_2_INSERT_0]], i32 [[TMP11]], 1 ; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_2_INSERT_1]], ptr [[VAL_FCA_2_ALLOCA]], align 1 ; CHECK-NEXT: [[VAL_FCA_2_ALLOCA_LOAD:%.*]] = load ptr addrspace(1), ptr [[VAL_FCA_2_ALLOCA]], align 8 @@ -603,9 +603,9 @@ define %complex_type @load_struct_dyn(i32 %i) { ; CHECK-SAME: i32 [[I:%.*]]) { ; CHECK-NEXT: [[VAL_FCA_0_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 ; CHECK-NEXT: [[VAL_FCA_2_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP2]], i32 0, i32 [[I]] +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 [[I]] ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr [20 x i32], ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i32 0, i32 [[I]] ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(20) ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr addrspace(20) [[TMP4]] to i32 @@ -616,12 +616,12 @@ define %complex_type @load_struct_dyn(i32 %i) { ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) [[TMP4]], align 4 ; CHECK-NEXT: br label [[TMP12:%.*]] ; CHECK: 10: -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(21) [[TMP3]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(32) [[TMP3]], align 4 ; CHECK-NEXT: br label [[TMP12]] ; CHECK: 12: ; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP9]], [[TMP8]] ], [ [[TMP11]], [[TMP10]] ] ; CHECK-NEXT: [[VAL_FCA_0_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP13]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(21) [[TMP3]], i32 0, i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 ; CHECK-NEXT: [[VAL_FCA_0_GEP_1:%.*]] = getelementptr inbounds <{ i32, i32 }>, ptr [[ADDR]], i32 0, i32 1 ; CHECK-NEXT: [[TMP15:%.*]] = addrspacecast ptr [[VAL_FCA_0_GEP_1]] to ptr addrspace(20) ; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr addrspace(20) [[TMP15]] to i32 @@ -632,7 +632,7 @@ define %complex_type @load_struct_dyn(i32 %i) { ; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) [[TMP15]], align 4 ; CHECK-NEXT: br label [[TMP23:%.*]] ; CHECK: 21: -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(21) [[TMP14]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(32) [[TMP14]], align 4 ; CHECK-NEXT: br label [[TMP23]] ; CHECK: 23: ; CHECK-NEXT: [[TMP24:%.*]] = phi i32 [ [[TMP20]], [[TMP19]] ], [ [[TMP22]], [[TMP21]] ] @@ -640,7 +640,7 @@ define %complex_type @load_struct_dyn(i32 %i) { ; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_0_INSERT_1]], ptr [[VAL_FCA_0_ALLOCA]], align 1 ; CHECK-NEXT: [[VAL_FCA_0_ALLOCA_LOAD:%.*]] = load ptr, ptr [[VAL_FCA_0_ALLOCA]], align 8 ; CHECK-NEXT: [[VAL_FCA_0_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE:%.*]] poison, ptr [[VAL_FCA_0_ALLOCA_LOAD]], 0 -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(21) [[TMP3]], i32 0, i32 1 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 1 ; CHECK-NEXT: [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds [[COMPLEX_TYPE]], ptr [[ADDR]], i32 0, i32 1 ; CHECK-NEXT: [[TMP26:%.*]] = addrspacecast ptr [[VAL_FCA_1_GEP]] to ptr addrspace(20) ; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(20) [[TMP26]] to i32 @@ -651,12 +651,12 @@ define %complex_type @load_struct_dyn(i32 %i) { ; CHECK-NEXT: [[TMP31:%.*]] = load half, ptr addrspace(20) [[TMP26]], align 4 ; CHECK-NEXT: br label [[TMP34:%.*]] ; CHECK: 32: -; CHECK-NEXT: [[TMP33:%.*]] = load half, ptr addrspace(21) [[TMP25]], align 4 +; CHECK-NEXT: [[TMP33:%.*]] = load half, ptr addrspace(32) [[TMP25]], align 4 ; CHECK-NEXT: br label [[TMP34]] ; CHECK: 34: ; CHECK-NEXT: [[TMP35:%.*]] = phi half [ [[TMP31]], [[TMP30]] ], [ [[TMP33]], [[TMP32]] ] ; CHECK-NEXT: [[VAL_FCA_1_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE]] [[VAL_FCA_0_INSERT]], half [[TMP35]], 1 -; CHECK-NEXT: [[TMP36:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(21) [[TMP3]], i32 0, i32 2 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 2 ; CHECK-NEXT: [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds [[COMPLEX_TYPE]], ptr [[ADDR]], i32 0, i32 2 ; CHECK-NEXT: [[TMP37:%.*]] = addrspacecast ptr [[VAL_FCA_2_GEP]] to ptr addrspace(20) ; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr addrspace(20) [[TMP37]] to i32 @@ -667,12 +667,12 @@ define %complex_type @load_struct_dyn(i32 %i) { ; CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(20) [[TMP37]], align 4 ; CHECK-NEXT: br label [[TMP45:%.*]] ; CHECK: 43: -; CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(21) [[TMP36]], align 4 +; CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(32) [[TMP36]], align 4 ; CHECK-NEXT: br label [[TMP45]] ; CHECK: 45: ; CHECK-NEXT: [[TMP46:%.*]] = phi i32 [ [[TMP42]], [[TMP41]] ], [ [[TMP44]], [[TMP43]] ] ; CHECK-NEXT: [[VAL_FCA_2_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP46]], 0 -; CHECK-NEXT: [[TMP47:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(21) [[TMP36]], i32 0, i32 1 +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP36]], i32 0, i32 1 ; CHECK-NEXT: [[VAL_FCA_2_GEP_1:%.*]] = getelementptr inbounds <{ i32, i32 }>, ptr [[VAL_FCA_2_GEP]], i32 0, i32 1 ; CHECK-NEXT: [[TMP48:%.*]] = addrspacecast ptr [[VAL_FCA_2_GEP_1]] to ptr addrspace(20) ; CHECK-NEXT: [[TMP49:%.*]] = ptrtoint ptr addrspace(20) [[TMP48]] to i32 @@ -683,7 +683,7 @@ define %complex_type @load_struct_dyn(i32 %i) { ; CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(20) [[TMP48]], align 4 ; CHECK-NEXT: br label [[TMP56:%.*]] ; CHECK: 54: -; CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(21) [[TMP47]], align 4 +; CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(32) [[TMP47]], align 4 ; CHECK-NEXT: br label [[TMP56]] ; CHECK: 56: ; CHECK-NEXT: [[TMP57:%.*]] = phi i32 [ [[TMP53]], [[TMP52]] ], [ [[TMP55]], [[TMP54]] ] @@ -733,29 +733,29 @@ define %complex_type @store_struct_mem(%complex_type %val) { ; CHECK-SAME: [[COMPLEX_TYPE:%.*]] [[VAL:%.*]]) { ; CHECK-NEXT: [[VAL_FCA_0_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 ; CHECK-NEXT: [[VAL_FCA_2_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP2]], i32 0, i32 15 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 15 ; CHECK-NEXT: [[VAL_FCA_0_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 0 ; CHECK-NEXT: store ptr [[VAL_FCA_0_EXTRACT]], ptr [[VAL_FCA_0_ALLOCA]], align 8 ; CHECK-NEXT: [[VAL_FCA_0_ALLOCA_LOAD:%.*]] = load <{ i32, i32 }>, ptr [[VAL_FCA_0_ALLOCA]], align 1 ; CHECK-NEXT: [[VAL_FCA_0_EXTRACT_0:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_0_ALLOCA_LOAD]], 0 -; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_0]], ptr addrspace(21) [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(21) [[TMP3]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_0]], ptr addrspace(32) [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 ; CHECK-NEXT: [[VAL_FCA_0_EXTRACT_1:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_0_ALLOCA_LOAD]], 1 -; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_1]], ptr addrspace(21) [[TMP4]], align 4 +; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_1]], ptr addrspace(32) [[TMP4]], align 4 ; CHECK-NEXT: [[VAL_FCA_1_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(21) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: store half [[VAL_FCA_1_EXTRACT]], ptr addrspace(21) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 1 +; CHECK-NEXT: store half [[VAL_FCA_1_EXTRACT]], ptr addrspace(32) [[TMP5]], align 4 ; CHECK-NEXT: [[VAL_FCA_2_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(21) [[TMP3]], i32 0, i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 2 ; CHECK-NEXT: store ptr addrspace(1) [[VAL_FCA_2_EXTRACT]], ptr [[VAL_FCA_2_ALLOCA]], align 8 ; CHECK-NEXT: [[VAL_FCA_2_ALLOCA_LOAD:%.*]] = load <{ i32, i32 }>, ptr [[VAL_FCA_2_ALLOCA]], align 1 ; CHECK-NEXT: [[VAL_FCA_2_EXTRACT_0:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_2_ALLOCA_LOAD]], 0 -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_0]], ptr addrspace(21) [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(21) [[TMP6]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_0]], ptr addrspace(32) [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP6]], i32 0, i32 1 ; CHECK-NEXT: [[VAL_FCA_2_EXTRACT_1:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_2_ALLOCA_LOAD]], 1 -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_1]], ptr addrspace(21) [[TMP7]], align 4 +; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_1]], ptr addrspace(32) [[TMP7]], align 4 ; CHECK-NEXT: ret [[COMPLEX_TYPE]] [[VAL]] ; %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 15 @@ -769,9 +769,9 @@ define %complex_type @store_struct_both(%complex_type %val) { ; CHECK-SAME: [[COMPLEX_TYPE:%.*]] [[VAL:%.*]]) { ; CHECK-NEXT: [[VAL_FCA_0_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 ; CHECK-NEXT: [[VAL_FCA_2_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP2]], i32 0, i32 13 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 13 ; CHECK-NEXT: [[VAL_FCA_0_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 0 ; CHECK-NEXT: store ptr [[VAL_FCA_0_EXTRACT]], ptr [[VAL_FCA_0_ALLOCA]], align 8 ; CHECK-NEXT: [[VAL_FCA_0_ALLOCA_LOAD:%.*]] = load <{ i32, i32 }>, ptr [[VAL_FCA_0_ALLOCA]], align 1 @@ -780,17 +780,17 @@ define %complex_type @store_struct_both(%complex_type %val) { ; CHECK-NEXT: [[VAL_FCA_0_EXTRACT_1:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_0_ALLOCA_LOAD]], 1 ; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_1]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 56) to ptr addrspace(20)), align 4 ; CHECK-NEXT: [[VAL_FCA_1_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(21) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: store half [[VAL_FCA_1_EXTRACT]], ptr addrspace(21) [[TMP4]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 1 +; CHECK-NEXT: store half [[VAL_FCA_1_EXTRACT]], ptr addrspace(32) [[TMP4]], align 4 ; CHECK-NEXT: [[VAL_FCA_2_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(21) [[TMP3]], i32 0, i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 2 ; CHECK-NEXT: store ptr addrspace(1) [[VAL_FCA_2_EXTRACT]], ptr [[VAL_FCA_2_ALLOCA]], align 8 ; CHECK-NEXT: [[VAL_FCA_2_ALLOCA_LOAD:%.*]] = load <{ i32, i32 }>, ptr [[VAL_FCA_2_ALLOCA]], align 1 ; CHECK-NEXT: [[VAL_FCA_2_EXTRACT_0:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_2_ALLOCA_LOAD]], 0 -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_0]], ptr addrspace(21) [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(21) [[TMP5]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_0]], ptr addrspace(32) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP5]], i32 0, i32 1 ; CHECK-NEXT: [[VAL_FCA_2_EXTRACT_1:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_2_ALLOCA_LOAD]], 1 -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_1]], ptr addrspace(21) [[TMP6]], align 4 +; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_1]], ptr addrspace(32) [[TMP6]], align 4 ; CHECK-NEXT: ret [[COMPLEX_TYPE]] [[VAL]] ; %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 13 @@ -804,9 +804,9 @@ define %complex_type @store_struct_dyn(%complex_type %val, i32 %i) { ; CHECK-SAME: [[COMPLEX_TYPE:%.*]] [[VAL:%.*]], i32 [[I:%.*]]) { ; CHECK-NEXT: [[VAL_FCA_0_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 ; CHECK-NEXT: [[VAL_FCA_2_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(21) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(21) [[TMP2]], i32 0, i32 [[I]] +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 [[I]] ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr [20 x i32], ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i32 0, i32 [[I]] ; CHECK-NEXT: [[VAL_FCA_0_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 0 ; CHECK-NEXT: store ptr [[VAL_FCA_0_EXTRACT]], ptr [[VAL_FCA_0_ALLOCA]], align 8 @@ -821,10 +821,10 @@ define %complex_type @store_struct_dyn(%complex_type %val, i32 %i) { ; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_0]], ptr addrspace(20) [[TMP4]], align 4 ; CHECK-NEXT: br label [[TMP10:%.*]] ; CHECK: 9: -; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_0]], ptr addrspace(21) [[TMP3]], align 4 +; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_0]], ptr addrspace(32) [[TMP3]], align 4 ; CHECK-NEXT: br label [[TMP10]] ; CHECK: 10: -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(21) [[TMP3]], i32 0, i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 ; CHECK-NEXT: [[VAL_FCA_0_GEP_1:%.*]] = getelementptr inbounds <{ i32, i32 }>, ptr [[ADDR]], i32 0, i32 1 ; CHECK-NEXT: [[VAL_FCA_0_EXTRACT_1:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_0_ALLOCA_LOAD]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[VAL_FCA_0_GEP_1]] to ptr addrspace(20) @@ -836,11 +836,11 @@ define %complex_type @store_struct_dyn(%complex_type %val, i32 %i) { ; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_1]], ptr addrspace(20) [[TMP12]], align 4 ; CHECK-NEXT: br label [[TMP18:%.*]] ; CHECK: 17: -; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_1]], ptr addrspace(21) [[TMP11]], align 4 +; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_1]], ptr addrspace(32) [[TMP11]], align 4 ; CHECK-NEXT: br label [[TMP18]] ; CHECK: 18: ; CHECK-NEXT: [[VAL_FCA_1_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 1 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(21) [[TMP3]], i32 0, i32 1 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 1 ; CHECK-NEXT: [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds [[COMPLEX_TYPE]], ptr [[ADDR]], i32 0, i32 1 ; CHECK-NEXT: [[TMP20:%.*]] = addrspacecast ptr [[VAL_FCA_1_GEP]] to ptr addrspace(20) ; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(20) [[TMP20]] to i32 @@ -851,11 +851,11 @@ define %complex_type @store_struct_dyn(%complex_type %val, i32 %i) { ; CHECK-NEXT: store half [[VAL_FCA_1_EXTRACT]], ptr addrspace(20) [[TMP20]], align 4 ; CHECK-NEXT: br label [[TMP26:%.*]] ; CHECK: 25: -; CHECK-NEXT: store half [[VAL_FCA_1_EXTRACT]], ptr addrspace(21) [[TMP19]], align 4 +; CHECK-NEXT: store half [[VAL_FCA_1_EXTRACT]], ptr addrspace(32) [[TMP19]], align 4 ; CHECK-NEXT: br label [[TMP26]] ; CHECK: 26: ; CHECK-NEXT: [[VAL_FCA_2_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 2 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(21) [[TMP3]], i32 0, i32 2 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 2 ; CHECK-NEXT: [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds [[COMPLEX_TYPE]], ptr [[ADDR]], i32 0, i32 2 ; CHECK-NEXT: store ptr addrspace(1) [[VAL_FCA_2_EXTRACT]], ptr [[VAL_FCA_2_ALLOCA]], align 8 ; CHECK-NEXT: [[VAL_FCA_2_ALLOCA_LOAD:%.*]] = load <{ i32, i32 }>, ptr [[VAL_FCA_2_ALLOCA]], align 1 @@ -869,10 +869,10 @@ define %complex_type @store_struct_dyn(%complex_type %val, i32 %i) { ; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_0]], ptr addrspace(20) [[TMP28]], align 4 ; CHECK-NEXT: br label [[TMP34:%.*]] ; CHECK: 33: -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_0]], ptr addrspace(21) [[TMP27]], align 4 +; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_0]], ptr addrspace(32) [[TMP27]], align 4 ; CHECK-NEXT: br label [[TMP34]] ; CHECK: 34: -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(21) [[TMP27]], i32 0, i32 1 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP27]], i32 0, i32 1 ; CHECK-NEXT: [[VAL_FCA_2_GEP_1:%.*]] = getelementptr inbounds <{ i32, i32 }>, ptr [[VAL_FCA_2_GEP]], i32 0, i32 1 ; CHECK-NEXT: [[VAL_FCA_2_EXTRACT_1:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_2_ALLOCA_LOAD]], 1 ; CHECK-NEXT: [[TMP36:%.*]] = addrspacecast ptr [[VAL_FCA_2_GEP_1]] to ptr addrspace(20) @@ -884,7 +884,7 @@ define %complex_type @store_struct_dyn(%complex_type %val, i32 %i) { ; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_1]], ptr addrspace(20) [[TMP36]], align 4 ; CHECK-NEXT: br label [[TMP42:%.*]] ; CHECK: 41: -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_1]], ptr addrspace(21) [[TMP35]], align 4 +; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_1]], ptr addrspace(32) [[TMP35]], align 4 ; CHECK-NEXT: br label [[TMP42]] ; CHECK: 42: ; CHECK-NEXT: ret [[COMPLEX_TYPE]] [[VAL]] diff --git a/llvmraytracing/test/dx/remat-intrinsic.ll b/llvmraytracing/test/dx/remat-intrinsic.ll new file mode 100644 index 0000000000..1d45c498b4 --- /dev/null +++ b/llvmraytracing/test/dx/remat-intrinsic.ll @@ -0,0 +1,202 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ +; RUN: -S %s 2> %t.stderr | FileCheck -check-prefix=POSTPROCESS %s +; RUN: count 0 < %t.stderr + +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" + +%dx.types.Handle = type { i8* } +%struct.DispatchSystemData = type { i32 } +%struct.TraversalData = type { %struct.SystemData } +%struct.SystemData = type { %struct.DispatchSystemData } +%struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } +%struct.MyParams = type { i32 } +%dx.types.fouri32 = type { i32, i32, i32, i32 } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.RWTexture2D >" = type { <4 x float> } + +@"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 + +declare i32 @_cont_GetContinuationStackAddr() + +declare %struct.DispatchSystemData @_cont_SetupRayGen() + +declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) + +declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) + +declare !types !14 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) + +; Function Attrs: nounwind memory(none) +declare !types !16 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone) #0 + +; Function Attrs: nounwind memory(none) +declare !types !18 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #0 + +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !types !19 { + ret i32 5 +} + +define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) !types !20 { + %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 + %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) + store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 + call void @_AmdRestoreSystemData(%struct.DispatchSystemData* %data) + ret void +} + +define void @called(%struct.MyParams* %params) !types !21 { + %i = call i32 @dx.op.dispatchRaysIndex.i32(i32 145, i8 0) + %unpacked = call %dx.types.fouri32 @dx.op.unpack4x8.i32(i32 219, i8 1, i32 %i) + %params_i = getelementptr %struct.MyParams, %struct.MyParams* %params, i32 0, i32 0 + %handle0 = load %dx.types.Handle, %dx.types.Handle* @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 + %handle1 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %handle0) + %handle2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %handle1, %dx.types.ResourceProperties { i32 16, i32 0 }) + call void @dx.op.callShader.struct.MyParams(i32 159, i32 2, %struct.MyParams* nonnull %params) + %a = extractvalue %dx.types.fouri32 %unpacked, 0 + %b = extractvalue %dx.types.fouri32 %unpacked, 1 + %c = extractvalue %dx.types.fouri32 %unpacked, 2 + %d = extractvalue %dx.types.fouri32 %unpacked, 3 + %packed = call i32 @dx.op.pack4x8.i32(i32 220, i8 0, i32 %a, i32 %b, i32 %c, i32 %d) + call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle %handle2, i32 0, i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, i8 15) + store i32 %packed, i32* %params_i, align 4 + ret void +} + +; Function Attrs: nounwind +declare !types !23 void @dx.op.callShader.struct.MyParams(i32, i32, %struct.MyParams*) #1 + +; Function Attrs: nounwind memory(none) +declare i32 @dx.op.dispatchRaysIndex.i32(i32, i8) #0 + +; Function Attrs: nounwind memory(none) +declare %dx.types.fouri32 @dx.op.unpack4x8.i32(i32, i8, i32) #0 + +; Function Attrs: nounwind memory(none) +declare i32 @dx.op.pack4x8.i32(i32, i8, i32, i32, i32, i32) #0 + +; Function Attrs: nounwind memory(none) +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #0 + +; Function Attrs: nounwind memory(none) +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #0 + +; Function Attrs: nounwind +declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #1 + +attributes #0 = { nounwind memory(none) } +attributes #1 = { nounwind } + +!llvm.ident = !{!0} +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.entryPoints = !{!3, !6} +!continuation.maxPayloadRegisterCount = !{!13} + +!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"} +!1 = !{i32 1, i32 6} +!2 = !{!"lib", i32 6, i32 6} +!3 = !{null, !"", null, !4, !12} +!4 = !{!5, !9, null, null} +!5 = !{!6} +!6 = !{void (%struct.MyParams*)* @called, !"called", null, null, !7} +!7 = !{i32 8, i32 12, i32 6, i32 16, i32 7, i32 8, i32 5, !8} +!8 = !{i32 0} +!9 = !{!10} +!10 = !{i32 0, %"class.RWTexture2D >"* bitcast (%dx.types.Handle* @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" to %"class.RWTexture2D >"*), !"RenderTarget", i32 0, i32 0, i32 1, i32 2, i1 false, i1 false, i1 false, !11} +!11 = !{i32 0, i32 9} +!12 = !{i32 0, i64 65536} +!13 = !{i32 30} +!14 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !15} +!15 = !{i32 0, %struct.SystemData poison} +!16 = !{!"function", <3 x i32> poison, !17} +!17 = !{i32 0, %struct.DispatchSystemData poison} +!18 = !{!"function", !"void", !17} +!19 = !{!"function", i32 poison, !17} +!20 = !{!"function", !"void", !17, i32 poison} +!21 = !{!"function", !"void", !22} +!22 = !{i32 0, %struct.MyParams poison} +!23 = !{!"function", !"void", i32 poison, i32 poison, !22} +; POSTPROCESS-LABEL: define i32 @_cont_GetLocalRootIndex( +; POSTPROCESS-SAME: ptr [[DATA:%.*]]) #[[ATTR1:[0-9]+]] { +; POSTPROCESS-NEXT: ret i32 5 +; +; +; POSTPROCESS-LABEL: define void @called( +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !continuation [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !continuation.registercount [[META14:![0-9]+]] !continuation.stacksize [[META18:![0-9]+]] !continuation.state [[META18]] { +; POSTPROCESS-NEXT: AllocaSpillBB: +; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; POSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 +; POSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; POSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POSTPROCESS-NEXT: [[TMP6:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[I:%.*]] = extractelement <3 x i32> [[TMP6]], i8 0 +; POSTPROCESS-NEXT: [[UNPACKED:%.*]] = call [[DX_TYPES_FOURI32:%.*]] @dx.op.unpack4x8.i32(i32 219, i8 1, i32 [[I]]) +; POSTPROCESS-NEXT: [[HANDLE0:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POSTPROCESS-NEXT: [[HANDLE1:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[HANDLE0]]) +; POSTPROCESS-NEXT: [[HANDLE2:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[HANDLE1]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; POSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 8 +; POSTPROCESS-NEXT: store i32 [[TMP8]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP10:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @called.resume.0 to i64)) +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP9]], i64 [[TMP10]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 +; POSTPROCESS-NEXT: unreachable +; +; +; POSTPROCESS-LABEL: define dso_local void @called.resume.0( +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !continuation [[META16]] !lgc.rt.shaderstage [[META17]] !continuation.registercount [[META14]] { +; POSTPROCESS-NEXT: entryresume.0: +; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -8 +; POSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 +; POSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POSTPROCESS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) +; POSTPROCESS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 +; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP7]], align 4 +; POSTPROCESS-NEXT: [[HANDLE011:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POSTPROCESS-NEXT: [[HANDLE110:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[HANDLE011]]) +; POSTPROCESS-NEXT: [[HANDLE29:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[HANDLE110]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; POSTPROCESS-NEXT: [[TMP8:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[I8:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 +; POSTPROCESS-NEXT: [[UNPACKED7:%.*]] = call [[DX_TYPES_FOURI32:%.*]] @dx.op.unpack4x8.i32(i32 219, i8 1, i32 [[I8]]) +; POSTPROCESS-NEXT: [[TMP9:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[I6:%.*]] = extractelement <3 x i32> [[TMP9]], i8 0 +; POSTPROCESS-NEXT: [[UNPACKED5:%.*]] = call [[DX_TYPES_FOURI32]] @dx.op.unpack4x8.i32(i32 219, i8 1, i32 [[I6]]) +; POSTPROCESS-NEXT: [[TMP10:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[I4:%.*]] = extractelement <3 x i32> [[TMP10]], i8 0 +; POSTPROCESS-NEXT: [[UNPACKED3:%.*]] = call [[DX_TYPES_FOURI32]] @dx.op.unpack4x8.i32(i32 219, i8 1, i32 [[I4]]) +; POSTPROCESS-NEXT: [[TMP11:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[I2:%.*]] = extractelement <3 x i32> [[TMP11]], i8 0 +; POSTPROCESS-NEXT: [[UNPACKED1:%.*]] = call [[DX_TYPES_FOURI32]] @dx.op.unpack4x8.i32(i32 219, i8 1, i32 [[I2]]) +; POSTPROCESS-NEXT: [[A:%.*]] = extractvalue [[DX_TYPES_FOURI32]] [[UNPACKED7]], 0 +; POSTPROCESS-NEXT: [[B:%.*]] = extractvalue [[DX_TYPES_FOURI32]] [[UNPACKED5]], 1 +; POSTPROCESS-NEXT: [[C:%.*]] = extractvalue [[DX_TYPES_FOURI32]] [[UNPACKED3]], 2 +; POSTPROCESS-NEXT: [[D:%.*]] = extractvalue [[DX_TYPES_FOURI32]] [[UNPACKED1]], 3 +; POSTPROCESS-NEXT: [[PACKED:%.*]] = call i32 @dx.op.pack4x8.i32(i32 220, i8 0, i32 [[A]], i32 [[B]], i32 [[C]], i32 [[D]]) +; POSTPROCESS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[HANDLE29]], i32 0, i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, i8 15) +; POSTPROCESS-NEXT: store i32 [[PACKED]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; POSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP12]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] +; POSTPROCESS-NEXT: unreachable +; diff --git a/shared/continuations/test/dx/remove-types-metadata.ll b/llvmraytracing/test/dx/remove-types-metadata.ll similarity index 99% rename from shared/continuations/test/dx/remove-types-metadata.ll rename to llvmraytracing/test/dx/remove-types-metadata.ll index 4b9eaa9587..36df137617 100644 --- a/shared/continuations/test/dx/remove-types-metadata.ll +++ b/llvmraytracing/test/dx/remove-types-metadata.ll @@ -2,7 +2,7 @@ ; RUN: opt --verify-each -passes='remove-types-metadata' -S %s 2> %t.stderr | FileCheck -check-prefix=METADATA %s ; RUN: count 0 < %t.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.DispatchSystemData = type { <3 x i32> } diff --git a/shared/continuations/test/dx/remove-unused-declarations.ll b/llvmraytracing/test/dx/remove-unused-declarations.ll similarity index 96% rename from shared/continuations/test/dx/remove-unused-declarations.ll rename to llvmraytracing/test/dx/remove-unused-declarations.ll index 0ad97a9fdb..20e12372fc 100644 --- a/shared/continuations/test/dx/remove-unused-declarations.ll +++ b/llvmraytracing/test/dx/remove-unused-declarations.ll @@ -1,9 +1,9 @@ ; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint' -S %s 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-DECL %s ; RUN: count 0 < %t0.stderr -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint' -S %s 2> %t1.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS-DECL %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint' -S %s 2> %t1.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS-DECL %s ; RUN: count 0 < %t1.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %struct.HitData = type { float, i32 } %struct.DispatchSystemData = type { <3 x i32> } diff --git a/shared/continuations/test/dx/traceray.ll b/llvmraytracing/test/dx/traceray.ll similarity index 93% rename from shared/continuations/test/dx/traceray.ll rename to llvmraytracing/test/dx/traceray.ll index b86cca5a9d..3f1dbad7d0 100644 --- a/shared/continuations/test/dx/traceray.ll +++ b/llvmraytracing/test/dx/traceray.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s ; RUN: count 0 < %t0.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s ; RUN: count 0 < %t1.stderr -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.DispatchSystemData = type { <3 x i32> } @@ -408,7 +408,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define void @MyRayGen( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.entry [[META14:![0-9]+]] !continuation.registercount [[META23]] !continuation [[META35:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.entry [[META14:![0-9]+]] !continuation.registercount [[META23]] !continuation [[META36:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) @@ -418,7 +418,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP4]] to ptr ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP5]]) #[[ATTR1:[0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA36:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA37:![0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) @@ -442,7 +442,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP14]], i64 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 -1, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META39:![0-9]+]], !continuation.returnedRegistercount !39, !continuation.wait.await [[META14]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 -1, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META34:![0-9]+]], !continuation.wait.await [[META14]], !continuation.returnedRegistercount !34 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP21]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 @@ -464,7 +464,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA36]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA37]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() ; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP35]], i8 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() @@ -481,7 +481,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @MyClosestHitShader( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META39]] !continuation [[META41:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META41:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 @@ -546,11 +546,11 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP44]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP45]], !continuation.registercount [[META39]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP45]], !continuation.registercount [[META34]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.TraversalData @MyAnyHitShader( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META42:![0-9]+]] !continuation.registercount [[META39]] !continuation [[META43:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META42:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META43:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_HITDATA]], align 8 @@ -649,7 +649,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I1:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP59]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP58]], ptr [[ADDR_I1]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP60]], !continuation.registercount [[META39]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP60]], !continuation.registercount [[META34]] ; LOWERRAYTRACINGPIPELINE: 61: ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP30]], ptr [[TMP29]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) @@ -682,7 +682,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I2:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP80]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP79]], ptr [[ADDR_I2]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP81]], !continuation.registercount [[META39]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP81]], !continuation.registercount [[META34]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.TraversalData @MyIntersectionShader( @@ -821,7 +821,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @MyMissShader( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META47:![0-9]+]] !continuation.registercount [[META39]] !continuation [[META48:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META47:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META48:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 @@ -863,7 +863,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP27]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP28]], !continuation.registercount [[META39]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP28]], !continuation.registercount [[META34]] ; ; ; DXILCONTPOSTPROCESS-LABEL: define i1 @_cont_IsEndSearch( @@ -906,12 +906,12 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyRayGen( -; DXILCONTPOSTPROCESS-SAME: ) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META22]] !continuation [[META34:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-SAME: ) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META22]] !continuation [[META35:![0-9]+]] !continuation.state [[META22]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() ; DXILCONTPOSTPROCESS-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 @@ -937,47 +937,50 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i32 [[TMP11]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META35:![0-9]+]], !continuation.returnedRegistercount !35 +; DXILCONTPOSTPROCESS-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i32 [[TMP11]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META33:![0-9]+]], !continuation.returnedRegistercount !33 ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define dso_local void @MyRayGen.resume.0( -; DXILCONTPOSTPROCESS-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META35]] !continuation [[META34]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META33]] !continuation [[META35]] { ; DXILCONTPOSTPROCESS-NEXT: entryresume.0: ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; DXILCONTPOSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP5]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP7]], i32 2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP9]], i32 3 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT6:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP4]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP6]], i32 2 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP8]], i32 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT6:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP10]], i8 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP11]], i8 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP12]], i8 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP10]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP13]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 3 -; DXILCONTPOSTPROCESS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP14]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP15]], float [[TMP16]], float [[TMP17]], float [[TMP18]], i8 15) -; DXILCONTPOSTPROCESS-NEXT: call void @continuation.complete() +; DXILCONTPOSTPROCESS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP11]], i8 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP9]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP12]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 +; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 3 +; DXILCONTPOSTPROCESS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP13]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP14]], float [[TMP15]], float [[TMP16]], float [[TMP17]], i8 15) +; DXILCONTPOSTPROCESS-NEXT: ret void +; DXILCONTPOSTPROCESS: entryresume.0.split: ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyClosestHitShader( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META36:![0-9]+]] !continuation.registercount [[META35]] !continuation [[META37:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META36:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META37:![0-9]+]] !continuation.state [[META22]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -1028,12 +1031,12 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP25]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META35]] +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP25]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META33]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyAnyHitShader( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META38:![0-9]+]] !continuation.registercount [[META35]] !continuation [[META39:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META38:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META39:![0-9]+]] !continuation.state [[META22]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA]], align 8 @@ -1189,7 +1192,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_5_GEP33]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_LOAD]], 5 ; DXILCONTPOSTPROCESS-NEXT: [[TMP35:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP35]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META35]] +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP35]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META33]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; DXILCONTPOSTPROCESS: 36: ; DXILCONTPOSTPROCESS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) @@ -1244,20 +1247,20 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_LOAD58:%.*]] = load i64, ptr [[DOTFCA_5_GEP57]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT59:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT56]], i64 [[DOTFCA_5_LOAD58]], 5 ; DXILCONTPOSTPROCESS-NEXT: [[TMP46:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP46]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT59]]), !continuation.registercount [[META35]] +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP46]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT59]]), !continuation.registercount [[META33]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyIntersectionShader( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META32:![0-9]+]] !continuation [[META41:![0-9]+]] !continuation.state [[META42:![0-9]+]] !continuation.stacksize [[META42]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META32:![0-9]+]] !continuation [[META41:![0-9]+]] !continuation.stacksize [[META42:![0-9]+]] !continuation.state [[META42]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 0 -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME:%.*]], ptr addrspace(21) [[TMP3]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 0 @@ -1283,27 +1286,27 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 8 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP5]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShader.resume.0 to i64)) -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP6]], i64 [[TMP7]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP6]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShader.resume.0 to i64)) +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP7]], i64 [[TMP8]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 ; DXILCONTPOSTPROCESS-NEXT: unreachable ; DXILCONTPOSTPROCESS: accepthit.i: ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_065_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP9]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_065_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_065_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_065_0_VEC_INSERT]], float [[TMP11]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_065_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_065_0_VEC_INSERT]], float [[TMP12]], i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT64:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_065_4_VEC_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT64]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() -; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP12:%.*]], label [[TMP14:%.*]] -; DXILCONTPOSTPROCESS: 12: +; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP13:%.*]], label [[TMP15:%.*]] +; DXILCONTPOSTPROCESS: 13: ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 @@ -1312,10 +1315,10 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT]], 5 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP13]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP14]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable -; DXILCONTPOSTPROCESS: 14: +; DXILCONTPOSTPROCESS: 15: ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 @@ -1324,36 +1327,36 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP15]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP16]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define dso_local void @MyIntersectionShader.resume.0( -; DXILCONTPOSTPROCESS-SAME: i32 [[TMP0:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META40]] !continuation.registercount [[META32]] !continuation [[META41]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META40]] !continuation.registercount [[META32]] !continuation [[META41]] { ; DXILCONTPOSTPROCESS-NEXT: entryresume.0: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -8 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP3]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP5]], i64 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 0, 0, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 0, 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT16:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 1, 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT18:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT20:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 3 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT22:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 5 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT16:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT18:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT20:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT22:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() -; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP7:%.*]], label [[TMP9:%.*]] -; DXILCONTPOSTPROCESS: 7: -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD_ADDR1:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME:%.*]], ptr addrspace(21) [[TMP6]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(21) [[RETURNADDR_RELOAD_ADDR1]], align 4 +; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP5:%.*]], label [[TMP9:%.*]] +; DXILCONTPOSTPROCESS: 5: +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(21) [[TMP7]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_1_0_EXTRACT12]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT14]], 1, 0 @@ -1366,8 +1369,9 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP8]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; DXILCONTPOSTPROCESS: 9: -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME]], ptr addrspace(21) [[TMP6]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[RETURNADDR_RELOAD_ADDR]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP11]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_EXTRACT12]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT14]], 1, 0 @@ -1376,21 +1380,21 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT22]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT24]], 5 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP10]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP12]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyIntersectionShaderLargeAttrs( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40]] !continuation.registercount [[META32]] !continuation [[META43:![0-9]+]] !continuation.state [[META42]] !continuation.stacksize [[META42]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40]] !continuation.registercount [[META32]] !continuation [[META43:![0-9]+]] !continuation.stacksize [[META42]] !continuation.state [[META42]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 0 -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADERLARGEATTRS_FRAME:%.*]], ptr addrspace(21) [[TMP3]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 0 @@ -1422,18 +1426,18 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_3_INSERT]], i32 104, 0, 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_4_INSERT]], i32 105, 0, 5 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_6_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_5_INSERT]], i32 106, 0, 6 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 8 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP5]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShaderLargeAttrs.resume.0 to i64)) -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP6]], i64 [[TMP7]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_6_INSERT]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP6]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShaderLargeAttrs.resume.0 to i64)) +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP7]], i64 [[TMP8]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_6_INSERT]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 ; DXILCONTPOSTPROCESS-NEXT: unreachable ; DXILCONTPOSTPROCESS: accepthit.i: -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 100 to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_070_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast i32 101 to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_070_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_070_0_VEC_INSERT]], float [[TMP9]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast i32 100 to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_070_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP9]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 101 to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_070_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_070_0_VEC_INSERT]], float [[TMP10]], i32 1 ; DXILCONTPOSTPROCESS-NEXT: store i32 102, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([30 x i32], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 1) to ptr addrspace(20)), align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 103, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([30 x i32], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i64 2) to ptr addrspace(20)), align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 104, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([30 x i32], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i64 3) to ptr addrspace(20)), align 4 @@ -1442,8 +1446,8 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_070_4_VEC_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() -; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP10:%.*]], label [[TMP12:%.*]] -; DXILCONTPOSTPROCESS: 10: +; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP11:%.*]], label [[TMP13:%.*]] +; DXILCONTPOSTPROCESS: 11: ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 @@ -1452,10 +1456,10 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT]], 5 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP11]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP12]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable -; DXILCONTPOSTPROCESS: 12: +; DXILCONTPOSTPROCESS: 13: ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 @@ -1464,36 +1468,36 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP13]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP14]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define dso_local void @MyIntersectionShaderLargeAttrs.resume.0( -; DXILCONTPOSTPROCESS-SAME: i32 [[TMP0:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META40]] !continuation.registercount [[META32]] !continuation [[META43]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META40]] !continuation.registercount [[META32]] !continuation [[META43]] { ; DXILCONTPOSTPROCESS-NEXT: entryresume.0: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -8 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP3]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP5]], i64 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 0, 0, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 0, 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT16:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 1, 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT18:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT20:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 3 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT22:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 5 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT16:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT18:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT20:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT22:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() -; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP7:%.*]], label [[TMP9:%.*]] -; DXILCONTPOSTPROCESS: 7: -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD_ADDR1:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADERLARGEATTRS_FRAME:%.*]], ptr addrspace(21) [[TMP6]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(21) [[RETURNADDR_RELOAD_ADDR1]], align 4 +; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP5:%.*]], label [[TMP9:%.*]] +; DXILCONTPOSTPROCESS: 5: +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(21) [[TMP7]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_1_0_EXTRACT12]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT14]], 1, 0 @@ -1506,8 +1510,9 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP8]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; DXILCONTPOSTPROCESS: 9: -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADERLARGEATTRS_FRAME]], ptr addrspace(21) [[TMP6]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[RETURNADDR_RELOAD_ADDR]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP11]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_EXTRACT12]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT14]], 1, 0 @@ -1516,13 +1521,13 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT22]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT24]], 5 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP10]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP12]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyMissShader( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META44:![0-9]+]] !continuation.registercount [[META35]] !continuation [[META45:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META44:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META45:![0-9]+]] !continuation.state [[META22]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -1555,6 +1560,6 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP13]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META35]] +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP13]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META33]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; diff --git a/shared/continuations/test/dx/unnamed-type-intrinsics.ll b/llvmraytracing/test/dx/unnamed-type-intrinsics.ll similarity index 98% rename from shared/continuations/test/dx/unnamed-type-intrinsics.ll rename to llvmraytracing/test/dx/unnamed-type-intrinsics.ll index dc9d7f13b3..f59a8644e1 100644 --- a/shared/continuations/test/dx/unnamed-type-intrinsics.ll +++ b/llvmraytracing/test/dx/unnamed-type-intrinsics.ll @@ -4,7 +4,7 @@ ; Check that using unnamed types works well with generating intrinsic names -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" ; struct.DispatchSystemData %0 = type { <3 x i32> } @@ -346,7 +346,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define void @MyRayGen( -; LOWERRAYTRACINGPIPELINE-SAME: [[TMP0:%.*]] [[TMP0]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META15:![0-9]+]] !continuation.entry [[META20:![0-9]+]] !continuation.registercount [[META15]] !continuation [[META21:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[TMP0:%.*]] [[TMP0]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META15:![0-9]+]] !continuation.entry [[META21:![0-9]+]] !continuation.registercount [[META15]] !continuation [[META22:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[TMP0]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[TMP0]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) @@ -356,7 +356,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP4]] to ptr ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP5]]) #[[ATTR1:[0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA22:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA23:![0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) @@ -380,7 +380,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP14]], i64 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = call ptr inttoptr (i64 4 to ptr)([[TMP1]] [[TRAV_DATA2_I]]), !continuation.registercount [[META25:![0-9]+]], !continuation.returnedRegistercount !25 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = call ptr inttoptr (i64 4 to ptr)([[TMP1]] [[TRAV_DATA2_I]]), !continuation.registercount [[META19:![0-9]+]], !continuation.returnedRegistercount !19 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = call [[TMP0]] @await.(ptr [[TMP21]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 @@ -402,7 +402,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA22]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA23]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() ; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP35]], i8 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() @@ -419,7 +419,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %0 @MyClosestHit( -; LOWERRAYTRACINGPIPELINE-SAME: [[TMP2:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META26:![0-9]+]] !continuation.registercount [[META25]] !continuation [[META27:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[TMP2:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META26:![0-9]+]] !continuation.registercount [[META19]] !continuation [[META27:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[TMP2]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 @@ -484,5 +484,5 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP45]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[TMP2]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load [[TMP0]], ptr [[TMP46]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[TMP0]] [[TMP47]], !continuation.registercount [[META25]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[TMP0]] [[TMP47]], !continuation.registercount [[META19]] ; diff --git a/shared/continuations/test/dx/wrong-system-data.ll b/llvmraytracing/test/dx/wrong-system-data.ll similarity index 98% rename from shared/continuations/test/dx/wrong-system-data.ll rename to llvmraytracing/test/dx/wrong-system-data.ll index 25e49fe391..44f2f6c66b 100644 --- a/shared/continuations/test/dx/wrong-system-data.ll +++ b/llvmraytracing/test/dx/wrong-system-data.ll @@ -2,7 +2,7 @@ ; CHECK: Invalid system data struct: Did not contain the needed struct type -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" %dx.types.Handle = type { i8* } %struct.TraversalData = type { %struct.SystemData } diff --git a/shared/continuations/test/intrinsics/discard-values.ll b/llvmraytracing/test/intrinsics/discard-values.ll similarity index 100% rename from shared/continuations/test/intrinsics/discard-values.ll rename to llvmraytracing/test/intrinsics/discard-values.ll diff --git a/shared/continuations/test/intrinsics/get-func-addr-not-found.ll b/llvmraytracing/test/intrinsics/get-func-addr-not-found.ll similarity index 100% rename from shared/continuations/test/intrinsics/get-func-addr-not-found.ll rename to llvmraytracing/test/intrinsics/get-func-addr-not-found.ll diff --git a/shared/continuations/test/intrinsics/get-func-addr.ll b/llvmraytracing/test/intrinsics/get-func-addr.ll similarity index 100% rename from shared/continuations/test/intrinsics/get-func-addr.ll rename to llvmraytracing/test/intrinsics/get-func-addr.ll diff --git a/shared/continuations/test/lgccps/alloca-select.ll b/llvmraytracing/test/lgccps/alloca-select.ll similarity index 97% rename from shared/continuations/test/lgccps/alloca-select.ll rename to llvmraytracing/test/lgccps/alloca-select.ll index 6622b1c372..1980f014ec 100644 --- a/shared/continuations/test/lgccps/alloca-select.ll +++ b/llvmraytracing/test/lgccps/alloca-select.ll @@ -10,7 +10,7 @@ define void @test({} %state, i32 %rcr, float %arg, i32 %arg1) !lgc.cps !0 { store i32 111, ptr %p, align 4 %t0 = fadd float %arg, 1.0 %cr = call i32 @lgc.cps.as.continuation.reference(ptr @callee) - %t1 = call float (...) @lgc.cps.await.f32(i32 %cr, i32 2, float %t0) + %t1 = call float (...) @lgc.cps.await__f32(i32 %cr, i32 2, float %t0) %tmp = fmul float %t1, %arg %v111 = load float, ptr %p, align 4 %returnvalue = fmul float %tmp, %v111 @@ -21,7 +21,7 @@ define void @test({} %state, i32 %rcr, float %arg, i32 %arg1) !lgc.cps !0 { !0 = !{i32 1} ; level = 1 declare i32 @lgc.cps.as.continuation.reference(...) memory(none) -declare float @lgc.cps.await.f32(...) +declare float @lgc.cps.await__f32(...) declare void @lgc.cps.jump(...) ; CHECK-LABEL: define void @test ; CHECK-SAME: ({} [[STATE:%.*]], i32 [[RCR:%.*]], float [[ARG:%.*]], i32 [[ARG1:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { diff --git a/shared/continuations/test/lgccps/await-if-else.ll b/llvmraytracing/test/lgccps/await-if-else.ll similarity index 96% rename from shared/continuations/test/lgccps/await-if-else.ll rename to llvmraytracing/test/lgccps/await-if-else.ll index d68edbb11d..57658fa5b7 100644 --- a/shared/continuations/test/lgccps/await-if-else.ll +++ b/llvmraytracing/test/lgccps/await-if-else.ll @@ -12,11 +12,11 @@ define void @test({} %state, i32 %rcr, float %arg) !lgc.cps !0 { br i1 %cond, label %bb1, label %bb2 bb1: - %t1 = call float (...) @lgc.cps.await.f32(i32 %cr, i32 2, float %arg) + %t1 = call float (...) @lgc.cps.await__f32(i32 %cr, i32 2, float %arg) br label %bb3 bb2: - %t2 = call float (...) @lgc.cps.await.f32(i32 %cr2, i32 2, float %t0) + %t2 = call float (...) @lgc.cps.await__f32(i32 %cr2, i32 2, float %t0) br label %bb3 bb3: %t3 = phi float [%t1, %bb1], [%t2, %bb2] @@ -28,7 +28,7 @@ bb3: !0 = !{i32 1} ; level = 1 declare i32 @lgc.cps.as.continuation.reference(...) memory(none) -declare float @lgc.cps.await.f32(...) +declare float @lgc.cps.await__f32(...) declare void @lgc.cps.jump(...) ; CHECK-LABEL: define void @test( ; CHECK-SAME: {} [[STATE:%.*]], i32 [[RCR:%.*]], float [[ARG:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { diff --git a/shared/continuations/test/lgccps/await-if.ll b/llvmraytracing/test/lgccps/await-if.ll similarity index 96% rename from shared/continuations/test/lgccps/await-if.ll rename to llvmraytracing/test/lgccps/await-if.ll index fed102292e..9ea90a281d 100644 --- a/shared/continuations/test/lgccps/await-if.ll +++ b/llvmraytracing/test/lgccps/await-if.ll @@ -11,7 +11,7 @@ entry: br i1 %cond, label %bb1, label %bb2 bb1: - %t1 = call float (...) @lgc.cps.await.f32(i32 %cr, i32 2, float %arg) + %t1 = call float (...) @lgc.cps.await__f32(i32 %cr, i32 2, float %arg) br label %bb2 bb2: @@ -24,7 +24,7 @@ bb2: !0 = !{i32 1} ; level = 1 declare i32 @lgc.cps.as.continuation.reference(...) memory(none) -declare float @lgc.cps.await.f32(...) +declare float @lgc.cps.await__f32(...) declare void @lgc.cps.jump(...) ; CHECK-LABEL: define void @test( ; CHECK-SAME: {} [[STATE:%.*]], i32 [[RCR:%.*]], float [[ARG:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { diff --git a/shared/continuations/test/lgccps/await-in-loop.ll b/llvmraytracing/test/lgccps/await-in-loop.ll similarity index 97% rename from shared/continuations/test/lgccps/await-in-loop.ll rename to llvmraytracing/test/lgccps/await-in-loop.ll index 0a7612aa41..0de4d89173 100644 --- a/shared/continuations/test/lgccps/await-in-loop.ll +++ b/llvmraytracing/test/lgccps/await-in-loop.ll @@ -11,7 +11,7 @@ entry: loop: %ind = phi i32 [0, %entry], [%inc, %loop] - %t1 = call float (...) @lgc.cps.await.f32(i32 %cr, i32 2, i32 %ind) + %t1 = call float (...) @lgc.cps.await__f32(i32 %cr, i32 2, i32 %ind) %inc = add i32 %ind, 1 %cond = fcmp olt float %t1, 5.0 br i1 %cond, label %loop, label %end @@ -26,7 +26,7 @@ end: !0 = !{i32 1} ; level = 1 declare i32 @lgc.cps.as.continuation.reference(...) memory(none) -declare float @lgc.cps.await.f32(...) +declare float @lgc.cps.await__f32(...) declare void @lgc.cps.jump(...) ; CHECK-LABEL: define void @test( ; CHECK-SAME: {} [[STATE:%.*]], i32 [[RCR:%.*]], float [[ARG:%.*]], float [[ARG2:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { diff --git a/shared/continuations/test/lgccps/cleanup-store-loads.ll b/llvmraytracing/test/lgccps/cleanup-store-loads.ll similarity index 99% rename from shared/continuations/test/lgccps/cleanup-store-loads.ll rename to llvmraytracing/test/lgccps/cleanup-store-loads.ll index 6355707977..37afa18379 100644 --- a/shared/continuations/test/lgccps/cleanup-store-loads.ll +++ b/llvmraytracing/test/lgccps/cleanup-store-loads.ll @@ -164,7 +164,7 @@ bb2: ; preds = %entry define internal { ptr, ptr } @test.resume.0(ptr noalias noundef nonnull align 4 dereferenceable(8) %0, i1 %1) !lgc.cps !0 !continuation !1 { entryresume.0: %2 = load ptr, ptr %0, align 8 - %3 = call float @continuations.getReturnValue.f32() + %3 = call float @continuations.getReturnValue__f32() %arg.reload.addr = getelementptr inbounds %test.Frame, ptr %2, i32 0, i32 1 %arg.reload = load float, ptr %arg.reload.addr, align 4 %rcr.reload.addr = getelementptr inbounds %test.Frame, ptr %2, i32 0, i32 0 @@ -177,7 +177,7 @@ entryresume.0: ; Function Attrs: memory(none) declare i32 @lgc.cps.as.continuation.reference(...) #0 -declare float @lgc.cps.await.f32(...) +declare float @lgc.cps.await__f32(...) declare void @lgc.cps.jump(...) @@ -197,7 +197,7 @@ declare ptr @llvm.coro.begin(token, ptr writeonly) #1 declare i1 @llvm.coro.suspend.retcon.i1(...) #1 ; Function Attrs: nounwind willreturn -declare float @continuations.getReturnValue.f32() #2 +declare float @continuations.getReturnValue__f32() #2 ; Function Attrs: noreturn declare void @continuation.return(...) #3 diff --git a/llvmraytracing/test/lgccps/cps-no-await.ll b/llvmraytracing/test/lgccps/cps-no-await.ll new file mode 100644 index 0000000000..f0bd4ee520 --- /dev/null +++ b/llvmraytracing/test/lgccps/cps-no-await.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 +; RUN: opt --verify-each -S -o - -passes='lower-await' %s | FileCheck --check-prefixes=LOWER-AWAIT %s + +define void @_cont_Traversal() !lgc.cps !{i32 2} !continuation !{ptr @_cont_Traversal} { + %pushconst = call ptr addrspace(4) @lgc.user.data(i32 32) + %fn = load ptr, ptr addrspace(4) %pushconst + %cr = ptrtoint ptr %fn to i32 + call void (...) @lgc.cps.jump(i32 %cr, i32 2, {} poison, i32 poison) + unreachable +} + +!lgc.cps.module = !{} + +declare ptr addrspace(4) @lgc.user.data(i32) +declare void @lgc.cps.jump(...) +; LOWER-AWAIT-LABEL: define { ptr, ptr } @_cont_Traversal( +; LOWER-AWAIT-SAME: ptr [[TMP0:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { +; LOWER-AWAIT-NEXT: [[TMP2:%.*]] = call token @llvm.coro.id.retcon(i32 8, i32 4, ptr [[TMP0]], ptr @continuation.prototype._cont_Traversal, ptr @continuation.malloc, ptr @continuation.free) +; LOWER-AWAIT-NEXT: [[TMP3:%.*]] = call ptr @llvm.coro.begin(token [[TMP2]], ptr null) +; LOWER-AWAIT-NEXT: [[PUSHCONST:%.*]] = call ptr addrspace(4) @lgc.user.data(i32 32) +; LOWER-AWAIT-NEXT: [[FN:%.*]] = load ptr, ptr addrspace(4) [[PUSHCONST]], align 8 +; LOWER-AWAIT-NEXT: [[CR:%.*]] = ptrtoint ptr [[FN]] to i32 +; LOWER-AWAIT-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 poison) +; LOWER-AWAIT-NEXT: unreachable +; diff --git a/shared/continuations/test/lgccps/entry-point-with-cps.ll b/llvmraytracing/test/lgccps/entry-point-with-cps.ll similarity index 96% rename from shared/continuations/test/lgccps/entry-point-with-cps.ll rename to llvmraytracing/test/lgccps/entry-point-with-cps.ll index afe4541051..e65f269e35 100644 --- a/shared/continuations/test/lgccps/entry-point-with-cps.ll +++ b/llvmraytracing/test/lgccps/entry-point-with-cps.ll @@ -16,7 +16,7 @@ define spir_func void @raygen({} %state, i32 %rcr) !lgc.shaderstage !{i32 7} !lg %cr.0 = ptrtoint ptr %fn to i32 %cr.1 = or i32 %cr.0, 2 - %r = call [2 x i32] (...) @lgc.cps.await.a2i32(i32 %cr.1, i32 4, i32 %x, ptr addrspace(1) %dst) + %r = call [2 x i32] (...) @lgc.cps.await__a2i32(i32 %cr.1, i32 4, i32 %x, ptr addrspace(1) %dst) store [2 x i32] %r, ptr addrspace(1) %dst @@ -30,7 +30,7 @@ define spir_func void @chs({} %state, i32 %rcr, i32 %x) !lgc.shaderstage !{i32 7 %cr.0 = ptrtoint ptr %fn to i32 %cr.1 = or i32 %cr.0, 1 - %y = call i32 (...) @lgc.cps.await.i32(i32 %cr.1, i32 2, i32 %x) + %y = call i32 (...) @lgc.cps.await__i32(i32 %cr.1, i32 2, i32 %x) call void (...) @lgc.cps.jump(i32 %rcr, i32 5, i32 %y) unreachable @@ -48,7 +48,7 @@ main: %fn = load ptr, ptr addrspace(4) %pushconst %cr.0 = ptrtoint ptr %fn to i32 - call void (...) @lgc.cps.await.isVoid(i32 %cr.0, i32 1, i32 5) + call void (...) @lgc.cps.await__isVoid(i32 %cr.0, i32 1, i32 5) br label %exit @@ -59,9 +59,9 @@ exit: declare ptr addrspace(4) @lgc.user.data(i32) declare <3 x i32> @lgc.shader.input.LocalInvocationId(i32) -declare void @lgc.cps.await.isVoid(...) -declare i32 @lgc.cps.await.i32(...) -declare [2 x i32] @lgc.cps.await.a2i32(...) +declare void @lgc.cps.await__isVoid(...) +declare i32 @lgc.cps.await__i32(...) +declare [2 x i32] @lgc.cps.await__a2i32(...) declare void @lgc.cps.jump(...) ; CHECK-LABEL: define spir_func void @raygen( ; CHECK-SAME: {} [[STATE:%.*]], i32 [[RCR:%.*]]) !lgc.shaderstage [[META0:![0-9]+]] !lgc.cps [[META1:![0-9]+]] !continuation [[META2:![0-9]+]] { @@ -150,7 +150,7 @@ declare void @lgc.cps.jump(...) ; LOWER-AWAIT-NEXT: [[TMP4:%.*]] = inttoptr i32 [[CR_1]] to ptr ; LOWER-AWAIT-NEXT: [[TMP5:%.*]] = call ptr [[TMP4]](i32 [[CR_1]], i32 4, i32 [[X]], ptr addrspace(1) [[DST]]) ; LOWER-AWAIT-NEXT: [[TMP6:%.*]] = call i1 (...) @llvm.coro.suspend.retcon.i1(ptr [[TMP5]]) -; LOWER-AWAIT-NEXT: [[TMP7:%.*]] = call [2 x i32] @continuations.getReturnValue.a2i32() +; LOWER-AWAIT-NEXT: [[TMP7:%.*]] = call [2 x i32] @continuations.getReturnValue__a2i32() ; LOWER-AWAIT-NEXT: store [2 x i32] [[TMP7]], ptr addrspace(1) [[DST]], align 4 ; LOWER-AWAIT-NEXT: call void (...) @continuation.return() ; LOWER-AWAIT-NEXT: unreachable @@ -167,7 +167,7 @@ declare void @lgc.cps.jump(...) ; LOWER-AWAIT-NEXT: [[TMP4:%.*]] = inttoptr i32 [[CR_1]] to ptr ; LOWER-AWAIT-NEXT: [[TMP5:%.*]] = call ptr [[TMP4]](i32 [[CR_1]], i32 2, i32 [[X]]) ; LOWER-AWAIT-NEXT: [[TMP6:%.*]] = call i1 (...) @llvm.coro.suspend.retcon.i1(ptr [[TMP5]]) -; LOWER-AWAIT-NEXT: [[TMP7:%.*]] = call i32 @continuations.getReturnValue.i32() +; LOWER-AWAIT-NEXT: [[TMP7:%.*]] = call i32 @continuations.getReturnValue__i32() ; LOWER-AWAIT-NEXT: call void (...) @lgc.cps.jump(i32 [[RCR]], i32 5, i32 [[TMP7]]) ; LOWER-AWAIT-NEXT: unreachable ; diff --git a/shared/continuations/test/lgccps/multiple-await.ll b/llvmraytracing/test/lgccps/multiple-await.ll similarity index 95% rename from shared/continuations/test/lgccps/multiple-await.ll rename to llvmraytracing/test/lgccps/multiple-await.ll index 2667866e01..ed1d23c7c0 100644 --- a/shared/continuations/test/lgccps/multiple-await.ll +++ b/llvmraytracing/test/lgccps/multiple-await.ll @@ -7,10 +7,10 @@ declare !lgc.cps !0 void @callee2({}, i32, float) define void @test({} %state, i32 %rcr, float %arg, float %arg2) !lgc.cps !0 { %t0 = fadd float %arg, 1.0 %cr = call i32 @lgc.cps.as.continuation.reference(ptr @callee) - %t1 = call float (...) @lgc.cps.await.f32(i32 %cr, i32 2, float %t0) + %t1 = call float (...) @lgc.cps.await__f32(i32 %cr, i32 2, float %t0) %t2 = fmul float %t1, %arg %cr2 = call i32 @lgc.cps.as.continuation.reference(ptr @callee2) - %t3 = call float (...) @lgc.cps.await.f32(i32 %cr2, i32 2, float %t2) + %t3 = call float (...) @lgc.cps.await__f32(i32 %cr2, i32 2, float %t2) %returnvalue = fadd float %t3, %arg2 call void (...) @lgc.cps.jump(i32 %rcr, i32 2, {} poison, i32 poison, float %returnvalue) unreachable @@ -19,7 +19,7 @@ define void @test({} %state, i32 %rcr, float %arg, float %arg2) !lgc.cps !0 { !0 = !{i32 1} ; level = 1 declare i32 @lgc.cps.as.continuation.reference(...) memory(none) -declare float @lgc.cps.await.f32(...) +declare float @lgc.cps.await__f32(...) declare void @lgc.cps.jump(...) ; CHECK-LABEL: define void @test( ; CHECK-SAME: {} [[STATE:%.*]], i32 [[RCR:%.*]], float [[ARG:%.*]], float [[ARG2:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { diff --git a/shared/continuations/test/lgccps/simple-await-more-state.ll b/llvmraytracing/test/lgccps/simple-await-more-state.ll similarity index 96% rename from shared/continuations/test/lgccps/simple-await-more-state.ll rename to llvmraytracing/test/lgccps/simple-await-more-state.ll index 112f7637e1..16d85e2a25 100644 --- a/shared/continuations/test/lgccps/simple-await-more-state.ll +++ b/llvmraytracing/test/lgccps/simple-await-more-state.ll @@ -6,7 +6,7 @@ declare !lgc.cps !0 void @callee({}, i32, float) define void @test({} %state, i32 %rcr, float %arg, float %arg2) !lgc.cps !0 { %t0 = fadd float %arg, 1.0 %cr = call i32 @lgc.cps.as.continuation.reference(ptr @callee) - %t1 = call float (...) @lgc.cps.await.f32(i32 %cr, i32 2, float %t0) + %t1 = call float (...) @lgc.cps.await__f32(i32 %cr, i32 2, float %t0) %t2 = fmul float %t1, %arg %returnvalue = fadd float %t2, %arg2 call void (...) @lgc.cps.jump(i32 %rcr, i32 2, {} poison, i32 poison, float %returnvalue) @@ -16,7 +16,7 @@ define void @test({} %state, i32 %rcr, float %arg, float %arg2) !lgc.cps !0 { !0 = !{i32 1} ; level = 1 declare i32 @lgc.cps.as.continuation.reference(...) memory(none) -declare float @lgc.cps.await.f32(...) +declare float @lgc.cps.await__f32(...) declare void @lgc.cps.jump(...) ; CHECK-LABEL: define void @test( ; CHECK-SAME: {} [[STATE:%.*]], i32 [[RCR:%.*]], float [[ARG:%.*]], float [[ARG2:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { diff --git a/shared/continuations/test/lgccps/simple-await.ll b/llvmraytracing/test/lgccps/simple-await.ll similarity index 96% rename from shared/continuations/test/lgccps/simple-await.ll rename to llvmraytracing/test/lgccps/simple-await.ll index 5ab088a2ac..2356a3b3ae 100644 --- a/shared/continuations/test/lgccps/simple-await.ll +++ b/llvmraytracing/test/lgccps/simple-await.ll @@ -6,7 +6,7 @@ declare !lgc.cps !0 void @callee({}, i32, float) define void @test({} %state, i32 %rcr, float %arg) !lgc.cps !0 { %t0 = fadd float %arg, 1.0 %cr = call i32 @lgc.cps.as.continuation.reference(ptr @callee) - %t1 = call float (...) @lgc.cps.await.f32(i32 %cr, i32 2, float %t0) + %t1 = call float (...) @lgc.cps.await__f32(i32 %cr, i32 2, float %t0) %returnvalue = fmul float %t1, %arg call void (...) @lgc.cps.jump(i32 %rcr, i32 2, {} poison, i32 poison, float %returnvalue) unreachable @@ -15,7 +15,7 @@ define void @test({} %state, i32 %rcr, float %arg) !lgc.cps !0 { !0 = !{i32 1} ; level = 1 declare i32 @lgc.cps.as.continuation.reference(...) memory(none) -declare float @lgc.cps.await.f32(...) +declare float @lgc.cps.await__f32(...) declare void @lgc.cps.jump(...) ; CHECK-LABEL: define void @test( ; CHECK-SAME: {} [[STATE:%.*]], i32 [[RCR:%.*]], float [[ARG:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { @@ -56,7 +56,7 @@ declare void @lgc.cps.jump(...) ; LOWER-AWAIT-NEXT: [[TMP4:%.*]] = inttoptr i32 [[CR]] to ptr ; LOWER-AWAIT-NEXT: [[TMP5:%.*]] = call ptr [[TMP4]](i32 [[CR]], i32 2, float [[T0]]) ; LOWER-AWAIT-NEXT: [[TMP6:%.*]] = call i1 (...) @llvm.coro.suspend.retcon.i1(ptr [[TMP5]]) -; LOWER-AWAIT-NEXT: [[TMP7:%.*]] = call float @continuations.getReturnValue.f32() +; LOWER-AWAIT-NEXT: [[TMP7:%.*]] = call float @continuations.getReturnValue__f32() ; LOWER-AWAIT-NEXT: [[RETURNVALUE:%.*]] = fmul float [[TMP7]], [[ARG]] ; LOWER-AWAIT-NEXT: call void (...) @lgc.cps.jump(i32 [[RCR]], i32 2, {} poison, i32 poison, float [[RETURNVALUE]]) ; LOWER-AWAIT-NEXT: unreachable diff --git a/shared/continuations/test/lit.cfg.py b/llvmraytracing/test/lit.cfg.py similarity index 97% rename from shared/continuations/test/lit.cfg.py rename to llvmraytracing/test/lit.cfg.py index dc05cda69b..99c6ef3f43 100644 --- a/shared/continuations/test/lit.cfg.py +++ b/llvmraytracing/test/lit.cfg.py @@ -15,7 +15,7 @@ from lit.llvm.subst import ToolSubst # name: The name of this test suite. -config.name = 'CONTINUATIONS' +config.name = 'RAYTRACING' # testFormat: The test format to use to interpret tests. config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) diff --git a/shared/continuations/test/lit.site.cfg.py.in b/llvmraytracing/test/lit.site.cfg.py.in similarity index 91% rename from shared/continuations/test/lit.site.cfg.py.in rename to llvmraytracing/test/lit.site.cfg.py.in index 2a7f47a259..abb5e82235 100644 --- a/shared/continuations/test/lit.site.cfg.py.in +++ b/llvmraytracing/test/lit.site.cfg.py.in @@ -24,4 +24,4 @@ import lit.llvm lit.llvm.initialize(lit_config, config) # Let the main config do the real work. -lit_config.load_config(config, "@CONTINUATIONS_TEST_SOURCE_DIR@/lit.cfg.py") +lit_config.load_config(config, "@LLVMRAYTRACING_TEST_SOURCE_DIR@/lit.cfg.py") diff --git a/llvmraytracing/unittests/CMakeLists.txt b/llvmraytracing/unittests/CMakeLists.txt new file mode 100644 index 0000000000..90b490a692 --- /dev/null +++ b/llvmraytracing/unittests/CMakeLists.txt @@ -0,0 +1,42 @@ +# Raytracing Unit tests. +# To execute all unit tests, run: +# cmake --build . --target check-raytracing-units + +add_custom_target(LlvmRaytracingUnitTests) +set_target_properties(LlvmRaytracingUnitTests PROPERTIES FOLDER "Raytracing Tests") + +function(add_llvmraytracing_unittest test_dirname) + add_unittest(LlvmRaytracingUnitTests ${test_dirname} ${ARGN}) +endfunction() + +# Add a LIT target to execute all unit tests. +# Required by lit.site.cfg.py.in. +set(LLVMRAYTRACING_UNIT_TEST_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(LLVMRAYTRACING_UNIT_TEST_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + +# Main config for unit tests. +configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py + MAIN_CONFIG + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py +) + +add_lit_testsuite(check-llvmraytracing-units "Running the LLVM Raytracing unit tests" + ${CMAKE_CURRENT_BINARY_DIR} + ${exclude_from_check_all} + DEPENDS + LlvmRaytracingUnitTests +) + +add_llvmraytracing_unittest(LlvmRaytracingUnitTargetTests + RemainingArgumentDwordTests.cpp +) + +target_link_libraries(LlvmRaytracingUnitTargetTests PRIVATE + LLVMRaytracing + LLVMCore + LLVMSupport +) + +set_compiler_options(LlvmRaytracingUnitTargetTests) diff --git a/shared/continuations/unittests/RemainingArgumentDwordTests.cpp b/llvmraytracing/unittests/RemainingArgumentDwordTests.cpp similarity index 96% rename from shared/continuations/unittests/RemainingArgumentDwordTests.cpp rename to llvmraytracing/unittests/RemainingArgumentDwordTests.cpp index 0368540d21..4e06b1c1ee 100644 --- a/shared/continuations/unittests/RemainingArgumentDwordTests.cpp +++ b/llvmraytracing/unittests/RemainingArgumentDwordTests.cpp @@ -69,7 +69,7 @@ using namespace llvm; unsigned dwordCount = lgc::cps::getArgumentDwordCount( \ LLVM_DL(ExpectedCount##TypeName), \ GET_TYPE_INITIALIZER(ExpectedCount##TypeName, TypeName)); \ - EXPECT_EQ(dwordCount, ExpectedCount); \ + EXPECT_EQ(dwordCount, static_cast(ExpectedCount)); \ } // Test the case where we are checking the size of a vector of elements. @@ -81,7 +81,7 @@ using namespace llvm; LLVM_DL(TestName), \ FixedVectorType::get(GET_TYPE_INITIALIZER(TestName, TypeName), \ NumElements)); \ - EXPECT_EQ(dwordCount, ExpectedCount); \ + EXPECT_EQ(dwordCount, static_cast(ExpectedCount)); \ } // Test the case where we are checking the size of struct of arbitrary elements. @@ -90,7 +90,7 @@ using namespace llvm; DECLARE_LLVM_LOCALS(TestName) \ unsigned dwordCount = lgc::cps::getArgumentDwordCount( \ LLVM_DL(TestName), StructType::get(__VA_ARGS__)); \ - EXPECT_EQ(dwordCount, ExpectedCount); \ + EXPECT_EQ(dwordCount, static_cast(ExpectedCount)); \ } // Test the case where we are checking a list of arbitrary elements. @@ -99,7 +99,7 @@ using namespace llvm; DECLARE_LLVM_LOCALS(TestName) \ unsigned dwordCount = \ lgc::cps::getArgumentDwordCount(LLVM_DL(TestName), {__VA_ARGS__}); \ - EXPECT_EQ(dwordCount, ExpectedCount); \ + EXPECT_EQ(dwordCount, static_cast(ExpectedCount)); \ } TEST_DWORD_COUNT(Int1, 1) diff --git a/shared/continuations/unittests/lit.cfg.py b/llvmraytracing/unittests/lit.cfg.py similarity index 92% rename from shared/continuations/unittests/lit.cfg.py rename to llvmraytracing/unittests/lit.cfg.py index 2990caf314..5ddc2e1474 100644 --- a/shared/continuations/unittests/lit.cfg.py +++ b/llvmraytracing/unittests/lit.cfg.py @@ -4,14 +4,14 @@ import lit.formats # name: The name of this test suite. -config.name = 'Continuations_Unit' +config.name = 'Raytracing_Unit' # suffixes: A list of file extensions to treat as test files. config.suffixes = [] # test_source_root: The root path where tests are located. # test_exec_root: The root path where tests should be run. -config.test_exec_root = config.continuations_unit_test_binary_dir +config.test_exec_root = config.raytracing_unit_test_binary_dir config.test_source_root = config.test_exec_root # testFormat: The test format to use to interpret tests. diff --git a/shared/continuations/unittests/lit.site.cfg.py.in b/llvmraytracing/unittests/lit.site.cfg.py.in similarity index 81% rename from shared/continuations/unittests/lit.site.cfg.py.in rename to llvmraytracing/unittests/lit.site.cfg.py.in index 1acbb89d4b..8d6e567a9c 100644 --- a/shared/continuations/unittests/lit.site.cfg.py.in +++ b/llvmraytracing/unittests/lit.site.cfg.py.in @@ -6,7 +6,7 @@ config.llvm_src_root = "@LLVM_BUILD_MAIN_SRC_DIR@" config.llvm_obj_root = "@LLVM_BINARY_DIR@" config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.llvm_build_mode = "@LLVM_BUILD_MODE@" -config.continuations_unit_test_binary_dir = "@CONTINUATIONS_UNIT_TEST_BINARY_DIR@" +config.raytracing_unit_test_binary_dir = "@LLVMRAYTRACING_UNIT_TEST_BINARY_DIR@" # Support substitution of the tools and libs dirs with user parameters. This is # used when we can't determine the tool dir at configuration time. @@ -19,4 +19,4 @@ except KeyError: lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) # Let the main config do the real work. -lit_config.load_config(config, "@CONTINUATIONS_UNIT_TEST_SOURCE_DIR@/lit.cfg.py") +lit_config.load_config(config, "@LLVMRAYTRACING_UNIT_TEST_SOURCE_DIR@/lit.cfg.py") diff --git a/shared/README.md b/shared/README.md deleted file mode 100644 index fa8962b633..0000000000 --- a/shared/README.md +++ /dev/null @@ -1 +0,0 @@ -Note: The contents of this directory will eventually be moved to the top level of the repository. diff --git a/shared/continuations/CMakeLists.txt b/shared/continuations/CMakeLists.txt deleted file mode 100644 index a715a91ef3..0000000000 --- a/shared/continuations/CMakeLists.txt +++ /dev/null @@ -1,92 +0,0 @@ -cmake_minimum_required(VERSION 3.13.4) - -project(Continuations LANGUAGES CXX) - -function(set_compiler_options PROJECT_NAME) - # Output with color if in terminal: https://github.com/ninja-build/ninja/wiki/FAQ - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - target_compile_options("${PROJECT_NAME}" PRIVATE -fdiagnostics-color=always) - elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - target_compile_options("${PROJECT_NAME}" PRIVATE -fcolor-diagnostics) - endif() -endfunction() - -option(CONTINUATIONS_BUILD_TESTS "Build continuation tests") - -add_llvm_library(LLVMContinuations - lib/CleanupContinuations.cpp - lib/Continuations.cpp - lib/ContinuationsDialect.cpp - lib/CpsStackLowering.cpp - lib/DXILContIntrinsicPrepare.cpp - lib/DXILContLgcRtOpConverter.cpp - lib/DXILContPostProcess.cpp - lib/DXILSupport.cpp - lib/GpurtContext.cpp - lib/GpurtDialect.cpp - lib/LegacyCleanupContinuations.cpp - lib/LgcCpsDialect.cpp - lib/LgcRtDialect.cpp - lib/LowerAwait.cpp - lib/LowerRaytracingPipeline.cpp - lib/PassRegistry.inc - lib/PayloadAccessQualifiers.cpp - lib/RegisterBuffer.cpp - lib/RemoveTypesMetadata.cpp - lib/SaveContinuationState.cpp - lib/TypesMetadata.cpp - - DEPENDS - intrinsics_gen - - LINK_COMPONENTS - Analysis - Core - Coroutines - IPO - Scalar - Support - TransformUtils -) - -target_include_directories(LLVMContinuations PUBLIC - $ - $ - $ -) - -llvm_map_components_to_libnames(extra_llvm_libs CompilerUtils) - -target_link_libraries(LLVMContinuations PUBLIC llvm_dialects ${extra_llvm_libs} llpc_version) -set_compiler_options(LLVMContinuations) - -# TableGen for dialects -set(CONTINUATIONS_TABLEGEN_EXE $) -set(CONTINUATIONS_TABLEGEN_TARGET llvm-dialects-tblgen) - -macro(cont_tablegen DIALECTNAME FILE OUTPUT_FILENAME) - set(LLVM_TARGET_DEFINITIONS "${FILE}") - set(TBLGEN_TARGET "${OUTPUT_FILENAME}TableGen") - - tablegen(CONTINUATIONS "${OUTPUT_FILENAME}.h.inc" -gen-dialect-decls --dialect "${DIALECTNAME}" "${CONTINUATIONS_TABLEGEN_DEFINES}" - EXTRA_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../../imported/llvm-dialects/include) - tablegen(CONTINUATIONS "${OUTPUT_FILENAME}.cpp.inc" -gen-dialect-defs --dialect "${DIALECTNAME}" "${CONTINUATIONS_TABLEGEN_DEFINES}" - EXTRA_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../../imported/llvm-dialects/include) - add_public_tablegen_target(${TBLGEN_TARGET}) - - add_dependencies(LLVMContinuations ${TBLGEN_TARGET}) -endmacro() - -cont_tablegen(continuations include/continuations/ContinuationsDialect.td ContinuationsDialect) -cont_tablegen(lgc.cps include/lgc/LgcCpsDialect.td LgcCpsDialect) -cont_tablegen(lgc.rt include/lgc/LgcRtDialect.td LgcRtDialect) -cont_tablegen(lgc.gpurt include/lgc/GpurtDialect.td GpurtDialect) - -target_compile_features(LLVMContinuations PUBLIC cxx_std_17) -set_target_properties(LLVMContinuations PROPERTIES CXX_EXTENSIONS OFF) - -add_subdirectory(plugin) -if(CONTINUATIONS_BUILD_TESTS) - add_subdirectory(test) - add_subdirectory(unittests) -endif() diff --git a/shared/continuations/lib/DXILContIntrinsicPrepare.cpp b/shared/continuations/lib/DXILContIntrinsicPrepare.cpp deleted file mode 100644 index 8598d7a081..0000000000 --- a/shared/continuations/lib/DXILContIntrinsicPrepare.cpp +++ /dev/null @@ -1,352 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ - -//===- DXILContIntrinsicPrepare.cpp - Change signature of functions -------===// -// -// A pass that prepares driver implemented functions for later use. -// -// This pass unmangles function names and changes sret arguments back to -// return values. -// -//===----------------------------------------------------------------------===// - -#include "continuations/Continuations.h" -#include "continuations/ContinuationsUtil.h" -#include "lgc/LgcRtDialect.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/InitializePasses.h" -#include -#include - -using namespace llvm; - -#define DEBUG_TYPE "dxil-cont-intrinsic-prepare" - -DXILContIntrinsicPreparePass::DXILContIntrinsicPreparePass() {} - -/// - Unmangle the function names to be more readable and to prevent confusion -/// with app defined functions later. -/// - Convert sret arguments back to return values -/// - Convert struct pointer arguments to pass structs by value -static Function *transformFunction(Function &F) { - auto Name = F.getName(); - LLVM_DEBUG(dbgs() << "Transforming function " << Name << "\n"); - std::string NewName = Name.str(); - - // Unmangle declarations because they cannot be renamed in the dx api - if (Name.contains('@')) { - // Extract unmangled name - auto Start = Name.find('?') + 1; - auto End = Name.find('@', Start); - if (Start == 0 || End == StringRef::npos || Start > Name.size() || - End > Name.size()) { - report_fatal_error( - Twine("Failed to unmangle function name: Failed to extract from '") + - Name + "' (start: " + Twine(Start) + ", end: " + Twine(End) + ")"); - } - - // Copy name, otherwise it will be deleted before it's set - NewName = Name.substr(Start, End - Start).str(); - } - - LLVM_DEBUG(dbgs() << " Set new name " << NewName << "\n"); - - // Change the return type and arguments - SmallVector AllArgTypes; - - Type *NewRetTy = F.getReturnType(); - - // Unpack the inner type of @class.matrix types - bool UnpackMatrixTy = false; - - if (NewRetTy->isStructTy() && NewRetTy->getStructNumElements() == 1) { - if (Name.contains("ObjectToWorld4x3") || - Name.contains("WorldToObject4x3")) { - NewRetTy = NewRetTy->getStructElementType(0); - UnpackMatrixTy = true; - } - } - - if (NewName == "_cont_Traversal") - lgc::rt::setLgcRtShaderStage(&F, lgc::rt::RayTracingShaderStage::Traversal); - else if (NewName == "_cont_KernelEntry") - lgc::rt::setLgcRtShaderStage(&F, - lgc::rt::RayTracingShaderStage::KernelEntry); - - Argument *RetArg = nullptr; - AttributeList FAttrs = F.getAttributes(); - SmallVector ParamAttrs; - - unsigned ArgNo = 0; - for (auto &Arg : F.args()) { - ContArgTy ArgTy = ContArgTy::get(&F, &Arg); - - bool DidHandleArg = false; - - if (Arg.hasStructRetAttr()) { - NewRetTy = Arg.getParamStructRetType(); - RetArg = &Arg; - - DidHandleArg = true; - } else if (Arg.getType()->isPointerTy()) { - StringRef NameRef{NewName}; - if (NameRef.contains("Await") || NameRef.contains("Enqueue") || - NameRef.contains("Traversal") || - (NewName == "_cont_SetTriangleHitAttributes" && - &Arg != F.getArg(0))) { - // Pass argument data as struct instead of as pointer - Type *ElemType = ArgTy.getPointerElementType(); - assert(ElemType && "Unable to resolve pointer type for argument"); - AllArgTypes.emplace_back(ElemType); - ParamAttrs.push_back({}); - - DidHandleArg = true; - } - } - - // Simply add the argument and its type. - if (!DidHandleArg) { - AllArgTypes.push_back(ArgTy); - ParamAttrs.push_back(FAttrs.getParamAttrs(ArgNo)); - } - - ArgNo++; - } - - // Create new empty function - ContFuncTy NewFuncTy(NewRetTy, AllArgTypes); - Function *NewFunc = cloneFunctionHeaderWithTypes(F, NewFuncTy, ParamAttrs); - - // Remove old name for the case that the new name is the same - F.setName(""); - NewFunc->setName(NewName); - NewFunc->addFnAttr(Attribute::AlwaysInline); - - // Set external linkage, so the functions don't get removed, even if they are - // never referenced at this point - NewFunc->setLinkage(GlobalValue::LinkageTypes::ExternalLinkage); - - // Transfer code from old function to new function - llvm::moveFunctionBody(F, *NewFunc); - - // Do not insert code on function declarations - std::optional> B; - bool IsDeclaration = NewFunc->empty(); - - if (!IsDeclaration) { - B.emplace(&*NewFunc->getEntryBlock().getFirstNonPHIOrDbgOrAlloca()); - - if (UnpackMatrixTy) { - // Move values of @class.matrix.x.y into return value of unpacked type - // Replace the return instruction with a new one, returning the unpacked - // value - llvm::forEachTerminator( - NewFunc, {Instruction::Ret}, [&](Instruction &Terminator) { - B->SetInsertPoint(&Terminator); - Value *RetExtractVal = - B->CreateExtractValue(Terminator.getOperand(0), {0}); - B->CreateRet(RetExtractVal); - Terminator.eraseFromParent(); - }); - } - } - - unsigned RetArgIdx = 0; - - // Set arg names for new function - for (unsigned Idx = 0, NewIdx = 0; - Idx != F.getFunctionType()->params().size(); ++Idx, ++NewIdx) { - Argument *OldArg = F.getArg(Idx); - if (OldArg == RetArg) { - // Skip return struct - --NewIdx; - RetArgIdx = Idx; - continue; - } - - Argument *Arg = NewFunc->getArg(NewIdx); - Arg->setName(OldArg->getName()); - - if (!IsDeclaration) { - if (Arg->getType() != OldArg->getType()) { - // Replace pointer argument with alloca - auto *Ty = Arg->getType(); - B->SetInsertPoint( - &*NewFunc->getEntryBlock().getFirstNonPHIOrDbgOrAlloca()); - auto *NewArg = B->CreateAlloca(Ty); - B->CreateStore(Arg, NewArg); - B->SetInsertPoint(NewArg); - OldArg->replaceAllUsesWith(NewArg); - } else { - OldArg->replaceAllUsesWith(Arg); - } - } - - if (OldArg->hasInRegAttr()) - Arg->addAttr(Attribute::InReg); - else - Arg->removeAttr(Attribute::AttrKind::InReg); - } - - if (RetArg && !IsDeclaration) { - // Replace sret argument with real return value - B->SetInsertPoint(&*NewFunc->getEntryBlock().getFirstNonPHIOrDbgOrAlloca()); - auto *RetAlloca = B->CreateAlloca(NewRetTy); - RetArg->replaceAllUsesWith(RetAlloca); - - // Replace returns with return value - llvm::forEachTerminator( - NewFunc, {Instruction::Ret}, [&](Instruction &Terminator) { - B->SetInsertPoint(&Terminator); - Value *RetLoad = B->CreateLoad(NewRetTy, RetAlloca); - B->CreateRet(RetLoad); - Terminator.eraseFromParent(); - }); - } - - // Replace all calls - SmallVector Uses; - llvm::forEachCall(F, [&](CallInst &CInst) { Uses.push_back(&CInst); }); - - for (auto *CInst : Uses) { - if (!B) - B.emplace(CInst); - else - B->SetInsertPoint(CInst); - - SmallVector Args; - Value *RetValue = nullptr; - for (unsigned Idx = 0; Idx != CInst->arg_size(); ++Idx) { - auto *Arg = CInst->getArgOperand(Idx); - auto *ArgTy = NewFunc->getArg(Args.size())->getType(); - if (RetArg && RetArgIdx == Idx) { - RetValue = Arg; - } else if (Arg->getType() != ArgTy && Arg->getType()->isPointerTy()) { - auto *Val = B->CreateLoad(ArgTy, Arg); - Args.push_back(Val); - } else { - Args.push_back(Arg); - } - } - - auto *NewCall = B->CreateCall(NewFunc, Args); - if (RetValue) - B->CreateStore(NewCall, RetValue); - - if (!CInst->getType()->isVoidTy()) - CInst->replaceAllUsesWith(NewCall); - CInst->eraseFromParent(); - } - - // Remove the old function - F.replaceAllUsesWith(ConstantExpr::getBitCast(NewFunc, F.getType())); - F.eraseFromParent(); - return NewFunc; -} - -static bool isGpuRtFuncName(StringRef Name) { - for (const auto &Intr : LgcRtGpuRtMap) { - if (Name.contains(Intr.second.Name)) - return true; - } - - return false; -} - -static bool isUtilFunction(StringRef Name) { - static const char *UtilNames[] = { - "AcceptHit", - "Await", - "Complete", - "ContinuationStackIsGlobal", - "ContStackAlloc", - "Enqueue", // To detect the mangled name of a declaration - "GetI32", - "GetCandidateState", - "GetCommittedState", - "GetContinuationStackAddr", - "GetContinuationStackGlobalMemBase", - "GetCurrentFuncAddr", - "GetFuncAddr", - "GetLocalRootIndex", - "GetResumePointAddr", - "GetRtip", - "GetShaderKind", - "GetTriangleHitAttributes", - "GetUninitialized", - "I32Count", - "IsEndSearch", - "KernelEntry", - "ReportHit", - "RestoreSystemData", - "SetI32", - "SetTriangleHitAttributes", - "SetupRayGen", - "TraceRay", - "Traversal", - }; - - for (const char *UtilName : UtilNames) { - if (Name.contains(UtilName)) - return true; - } - - return false; -} - -llvm::PreservedAnalyses DXILContIntrinsicPreparePass::run( - llvm::Module &M, llvm::ModuleAnalysisManager &AnalysisManager) { - LLVM_DEBUG(dbgs() << "Run the dxil-cont-intrinsic-prepare pass\n"); - - SmallVector Funcs(make_pointer_range(M.functions())); - - for (auto *F : Funcs) { - auto Name = F->getName(); - bool IsContImpl = Name.contains("_cont_"); - bool ShouldTransform = false; - - if (IsContImpl) { - if (isGpuRtFuncName(Name)) - ShouldTransform = true; - else if (isUtilFunction(Name)) - ShouldTransform = true; - } else if (Name.contains("_Amd") && isUtilFunction(Name)) { - ShouldTransform = true; - } - - if (ShouldTransform) - transformFunction(*F); - } - - fixupDxilMetadata(M); - - earlyDriverTransform(M); - - return PreservedAnalyses::none(); -} diff --git a/shared/continuations/lib/SaveContinuationState.cpp b/shared/continuations/lib/SaveContinuationState.cpp deleted file mode 100644 index a350484fb7..0000000000 --- a/shared/continuations/lib/SaveContinuationState.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ - -//===- SaveContinuationState.cpp - Callee-save continuation state ---------===// -// -// This pass replaces all uses of continuation.getContinuationStackOffset with a -// local variable and inits the stack pointer in entry functions with -// continuation.initialContinuationStackPtr. -// -// TODO: This pass used to handle a lot more regarding continuation state, -// and now only lowering of the CSP remains. The pass is now poorly named, -// and a later patch might completely remove this pass once CSP lowering -// is moved elsewhere. -// -//===----------------------------------------------------------------------===// - -#include "continuations/Continuations.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/InitializePasses.h" -#include - -using namespace llvm; - -#define DEBUG_TYPE "save-continuation-state" - -void SaveContinuationStatePass::lowerCsp(Function *Intr) { - DenseMap> ToProcess; - for (auto *U : Intr->users()) { - if (auto *Inst = dyn_cast(U)) { - auto *F = Inst->getFunction(); - ToProcess[F].push_back(Inst); - } - } - - for (const auto &P : ToProcess) { - auto *F = P.first; - B->SetInsertPointPastAllocas(F); - auto *CspType = getContinuationStackOffsetType(F->getContext()); - auto *Csp = B->CreateAlloca(CspType); - Csp->setName("csp"); - bool IsEntry = F->hasMetadata(ContHelper::MDEntryName); - if (IsEntry) { - // Init csp through intrinsic - auto *Init = getContinuationCspInit(*F->getParent()); - B->CreateStore(B->CreateCall(Init), Csp); - } else { - // Init csp from first argument - B->CreateStore(F->getArg(0), Csp); - } - - for (auto *Call : P.second) { - Call->replaceAllUsesWith(Csp); - Call->eraseFromParent(); - } - } -} - -llvm::PreservedAnalyses -SaveContinuationStatePass::run(llvm::Module &M, - llvm::ModuleAnalysisManager &AnalysisManager) { - LLVM_DEBUG(dbgs() << "Run the save-continuation-state pass\n"); - - bool Changed = false; - - IRBuilder<> Builder(M.getContext()); - I32 = Builder.getInt32Ty(); - B = &Builder; - Mod = &M; - - if (auto *Intr = M.getFunction("continuation.getContinuationStackOffset")) { - Changed = true; - lowerCsp(Intr); - } - - B = nullptr; - - if (Changed) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} diff --git a/shared/continuations/plugin/CMakeLists.txt b/shared/continuations/plugin/CMakeLists.txt deleted file mode 100644 index 954d83df46..0000000000 --- a/shared/continuations/plugin/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -set(LLVM_CONTINUATIONSPLUGIN_LINK_INTO_TOOLS ON CACHE BOOL "Link plugin into tools" FORCE) - -add_llvm_pass_plugin(ContinuationsPlugin - Plugin.cpp - - LINK_COMPONENTS - Support -) - -target_link_libraries(ContinuationsPlugin PRIVATE LLVMContinuations) -set_compiler_options(ContinuationsPlugin) diff --git a/shared/continuations/test/CMakeLists.txt b/shared/continuations/test/CMakeLists.txt deleted file mode 100644 index aa3e5585bc..0000000000 --- a/shared/continuations/test/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -set(CONTINUATIONS_TEST_DEPENDS opt FileCheck count not) -add_custom_target(continuations-test-depends DEPENDS ${CONTINUATIONS_TEST_DEPENDS}) -set_target_properties(continuations-test-depends PROPERTIES FOLDER "Tests") - -# required by lit.site.cfg.py.in -set(CONTINUATIONS_TEST_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) - -# required by configure_lit_site_cfg -set(LLVM_LIT_OUTPUT_DIR ${LLVM_TOOLS_BINARY_DIR}) -configure_lit_site_cfg( - ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py - MAIN_CONFIG - ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py -) - -add_lit_testsuite(check-continuations "Running the continuations regression tests" - ${CMAKE_CURRENT_BINARY_DIR} - ${exclude_from_check_all} - DEPENDS ${CONTINUATIONS_TEST_DEPENDS} -) -set_target_properties(check-continuations PROPERTIES FOLDER "Tests") - -add_lit_testsuites(CONTINUATIONS ${CMAKE_CURRENT_SOURCE_DIR} - ${exclude_from_check_all} - DEPENDS ${CONTINUATIONS_TEST_DEPENDS} -) diff --git a/shared/continuations/test/dx/cleanup-continuations-malloc.ll b/shared/continuations/test/dx/cleanup-continuations-malloc.ll deleted file mode 100644 index c878e8f543..0000000000 --- a/shared/continuations/test/dx/cleanup-continuations-malloc.ll +++ /dev/null @@ -1,69 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt --verify-each -passes='lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint' -S %s 2> %t.stderr | FileCheck %s -; RUN: count 0 < %t.stderr - -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" - -%continuation.token = type { } - -declare void @await.void(%continuation.token*) -declare %continuation.token* @async_fun() - -define <4 x i32> @simple_await(<4 x i32> %arg) !continuation.registercount !1 { -; CHECK-LABEL: define void @simple_await( -; CHECK-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], <4 x i32> [[ARG:%.*]]) !continuation.registercount [[META1:![0-9]+]] !continuation [[META2:![0-9]+]] !continuation.state [[META3:![0-9]+]] !continuation.stacksize [[META3]] { -; CHECK-NEXT: AllocaSpillBB: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 0 -; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(21) [[TMP3]], i32 0, i32 0 -; CHECK-NEXT: store <4 x i32> [[ARG]], ptr addrspace(21) [[ARG_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(21) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 24 -; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i32 [[TMP8]], i64 ptrtoint (ptr @simple_await.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 -; CHECK-NEXT: unreachable -; - %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 - call void @await.void(%continuation.token* %tok) - ret <4 x i32> %arg, !continuation.registercount !1 -} - -define void @simple_await_entry(<4 x i32> %arg, <4 x i32> addrspace(1)* %mem) !continuation.entry !0 !continuation.registercount !1 { -; CHECK-LABEL: define void @simple_await_entry( -; CHECK-SAME: <4 x i32> [[ARG:%.*]], ptr addrspace(1) [[MEM:%.*]]) !continuation.registercount [[META1]] !continuation.entry [[META4:![0-9]+]] !continuation [[META5:![0-9]+]] !continuation.state [[META3]] !continuation.stacksize [[META3]] { -; CHECK-NEXT: AllocaSpillBB: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 0 -; CHECK-NEXT: [[MEM_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(21) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: store ptr addrspace(1) [[MEM]], ptr addrspace(21) [[MEM_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(21) [[TMP3]], i32 0, i32 0 -; CHECK-NEXT: store <4 x i32> [[ARG]], ptr addrspace(21) [[ARG_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 24 -; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i32 [[TMP8]], i64 ptrtoint (ptr @simple_await_entry.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 -; CHECK-NEXT: unreachable -; - %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 - call void @await.void(%continuation.token* %tok) - store <4 x i32> %arg, <4 x i32> addrspace(1)* %mem - ret void, !continuation.registercount !1 -} - -!continuation.stackAddrspace = !{!2} - -!0 = !{} -!1 = !{i32 0} -!2 = !{i32 21} diff --git a/shared/continuations/test/dx/cleanup-continuations.ll b/shared/continuations/test/dx/cleanup-continuations.ll deleted file mode 100644 index 50c5dc0cd9..0000000000 --- a/shared/continuations/test/dx/cleanup-continuations.ll +++ /dev/null @@ -1,204 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 3 -; RUN: opt --verify-each -passes='legacy-cleanup-continuations,lint' -S %s 2> %t.stderr | FileCheck %s -; RUN: count 0 < %t.stderr - -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" - -%continuation.token = type { } -%await_with_ret_value.Frame = type { i64 } -%simple_await.Frame = type { i64 } -%simple_await_entry.Frame = type { } - -declare %continuation.token* @async_fun() -declare i32 @continuations.getReturnValue.i32() #0 -declare void @continuation.return(i64, ...) - -define { i8*, %continuation.token* } @simple_await(i8* %0) !continuation !0 !continuation.registercount !4 { -; CHECK-LABEL: define void @simple_await( -; CHECK-SAME: ) !continuation [[META1:![0-9]+]] !continuation.registercount [[META2:![0-9]+]] !continuation.state [[META3:![0-9]+]] !continuation.stacksize [[META3]] { -; CHECK-NEXT: AllocaSpillBB: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 0 -; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(21) [[TMP3]] to ptr addrspace(21) -; CHECK-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(21) [[FRAMEPTR]], i32 0, i32 0 -; CHECK-NEXT: store i64 -1, ptr addrspace(21) [[DOTSPILL_ADDR]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 8 -; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i32 [[TMP8]], i64 ptrtoint (ptr @simple_await.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 -; CHECK-NEXT: unreachable -; -AllocaSpillBB: - %FramePtr = bitcast i8* %0 to %simple_await.Frame* - %.spill.addr = getelementptr inbounds %simple_await.Frame, %simple_await.Frame* %FramePtr, i32 0, i32 0 - store i64 -1, i64* %.spill.addr, align 4 - %tok = call %continuation.token* @async_fun(), !continuation.registercount !4, !continuation.returnedRegistercount !4 - %1 = insertvalue { i8*, %continuation.token* } { i8* bitcast ({ i8*, %continuation.token* } (i8*, i1)* @simple_await.resume.0 to i8*), %continuation.token* undef }, %continuation.token* %tok, 1 - ret { i8*, %continuation.token* } %1 -} - -define internal { i8*, %continuation.token* } @simple_await.resume.0(i8* noalias nonnull align 16 dereferenceable(8) %0, i1 %1) !continuation !0 { -; CHECK-LABEL: define dso_local void @simple_await.resume.0( -; CHECK-SAME: i32 [[TMP0:%.*]]) !continuation [[META1]] !continuation.registercount [[META2]] { -; CHECK-NEXT: entryresume.0: -; CHECK-NEXT: [[TMP1:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -8 -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i64 0 -; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(21) [[TMP7]] to ptr addrspace(21) -; CHECK-NEXT: [[VFRAME:%.*]] = bitcast ptr addrspace(21) [[FRAMEPTR]] to ptr addrspace(21) -; CHECK-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(21) [[FRAMEPTR]], i32 0, i32 0 -; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i64, ptr addrspace(21) [[DOTRELOAD_ADDR]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 [[DOTRELOAD]], i32 [[TMP9]]), !continuation.registercount [[META2]] -; CHECK-NEXT: unreachable -; -entryresume.0: - %FramePtr = bitcast i8* %0 to %simple_await.Frame* - %vFrame = bitcast %simple_await.Frame* %FramePtr to i8* - %.reload.addr = getelementptr inbounds %simple_await.Frame, %simple_await.Frame* %FramePtr, i32 0, i32 0 - %.reload = load i64, i64* %.reload.addr, align 4 - call void (i64, ...) @continuation.return(i64 %.reload), !continuation.registercount !4 - unreachable -} - -define { i8*, %continuation.token* } @simple_await_entry(i8* %0) !continuation.entry !2 !continuation !3 !continuation.registercount !4 { -; CHECK-LABEL: define void @simple_await_entry( -; CHECK-SAME: ) !continuation [[META4:![0-9]+]] !continuation.registercount [[META2]] !continuation.entry [[META5:![0-9]+]] !continuation.state [[META3]] !continuation.stacksize [[META3]] { -; CHECK-NEXT: AllocaSpillBB: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 0 -; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(21) [[TMP3]] to ptr addrspace(21) -; CHECK-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 8 -; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i32 [[TMP8]], i64 ptrtoint (ptr @simple_await_entry.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 -; CHECK-NEXT: unreachable -; -AllocaSpillBB: - %FramePtr = bitcast i8* %0 to %simple_await_entry.Frame* - %tok = call %continuation.token* @async_fun(), !continuation.registercount !4, !continuation.returnedRegistercount !4 - %1 = bitcast { i8*, %continuation.token* } (i8*, i1)* @simple_await_entry.resume.0 to i8* - %2 = insertvalue { i8*, %continuation.token* } undef, i8* %1, 0 - %3 = insertvalue { i8*, %continuation.token* } %2, %continuation.token* %tok, 1 - ret { i8*, %continuation.token* } %3 -} - -define internal { i8*, %continuation.token* } @simple_await_entry.resume.0(i8* noalias nonnull align 16 dereferenceable(8) %0, i1 %1) !continuation.entry !2 !continuation !3 { -; CHECK-LABEL: define dso_local void @simple_await_entry.resume.0( -; CHECK-SAME: i32 [[TMP0:%.*]]) !continuation [[META4]] !continuation.registercount [[META2]] { -; CHECK-NEXT: entryresume.0: -; CHECK-NEXT: [[TMP1:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -8 -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i64 0 -; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(21) [[TMP7]] to ptr addrspace(21) -; CHECK-NEXT: [[VFRAME:%.*]] = bitcast ptr addrspace(21) [[FRAMEPTR]] to ptr addrspace(21) -; CHECK-NEXT: call void @continuation.complete() -; CHECK-NEXT: unreachable -; -entryresume.0: - %FramePtr = bitcast i8* %0 to %simple_await_entry.Frame* - %vFrame = bitcast %simple_await_entry.Frame* %FramePtr to i8* - call void (i64, ...) @continuation.return(i64 undef), !continuation.registercount !4 - unreachable -} - -define { i8*, %continuation.token* } @await_with_ret_value(i8* %0) !continuation !1 !continuation.registercount !4 { -; CHECK-LABEL: define void @await_with_ret_value( -; CHECK-SAME: ) !continuation [[META6:![0-9]+]] !continuation.registercount [[META2]] !continuation.state [[META3]] !continuation.stacksize [[META3]] { -; CHECK-NEXT: [[TMP1:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i64 0 -; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(21) [[TMP4]] to ptr addrspace(21) -; CHECK-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_RET_VALUE_FRAME:%.*]], ptr addrspace(21) [[FRAMEPTR]], i32 0, i32 0 -; CHECK-NEXT: store i64 -1, ptr addrspace(21) [[DOTSPILL_ADDR]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 8 -; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i32 [[TMP9]], i64 ptrtoint (ptr @await_with_ret_value.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 -; CHECK-NEXT: unreachable -; - %FramePtr = bitcast i8* %0 to %await_with_ret_value.Frame* - %.spill.addr = getelementptr inbounds %await_with_ret_value.Frame, %await_with_ret_value.Frame* %FramePtr, i32 0, i32 0 - store i64 -1, i64* %.spill.addr, align 4 - %tok = call %continuation.token* @async_fun(), !continuation.registercount !4, !continuation.returnedRegistercount !4 - %res = insertvalue { i8*, %continuation.token* } { i8* bitcast ({ i8*, %continuation.token* } (i8*, i1)* @await_with_ret_value.resume.0 to i8*), %continuation.token* undef }, %continuation.token* %tok, 1 - ret { i8*, %continuation.token* } %res -} - -define internal { i8*, %continuation.token* } @await_with_ret_value.resume.0(i8* noalias nonnull align 16 dereferenceable(8) %0, i1 %1) !continuation !1 { -; CHECK-LABEL: define dso_local void @await_with_ret_value.resume.0( -; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[RES1:%.*]]) !continuation [[META6]] !continuation.registercount [[META2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -8 -; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i32 [[TMP6]] to ptr addrspace(21) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP7]], i64 0 -; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(21) [[TMP8]] to ptr addrspace(21) -; CHECK-NEXT: [[VFRAME:%.*]] = bitcast ptr addrspace(21) [[FRAMEPTR]] to ptr addrspace(21) -; CHECK-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_RET_VALUE_FRAME:%.*]], ptr addrspace(21) [[FRAMEPTR]], i32 0, i32 0 -; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i64, ptr addrspace(21) [[DOTRELOAD_ADDR]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 [[DOTRELOAD]], i32 [[TMP10]], i32 [[RES1]]), !continuation.registercount [[META2]] -; CHECK-NEXT: unreachable -; - %FramePtr = bitcast i8* %0 to %await_with_ret_value.Frame* - %vFrame = bitcast %await_with_ret_value.Frame* %FramePtr to i8* - %.reload.addr = getelementptr inbounds %await_with_ret_value.Frame, %await_with_ret_value.Frame* %FramePtr, i32 0, i32 0 - %.reload = load i64, i64* %.reload.addr, align 4 - %res = call i32 @continuations.getReturnValue.i32() - call void (i64, ...) @continuation.return(i64 %.reload, i32 %res), !continuation.registercount !4 - unreachable -} - -attributes #0 = { nounwind } - -!continuation.stackAddrspace = !{!5} - -!0 = !{{ i8*, %continuation.token* } (i8*)* @simple_await} -!1 = !{{ i8*, %continuation.token* } (i8*)* @await_with_ret_value} -!2 = !{} -!3 = !{{ i8*, %continuation.token* } (i8*)* @simple_await_entry} -!4 = !{i32 0} -!5 = !{i32 21} -;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { noreturn } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { nofree norecurse nosync nounwind speculatable willreturn memory(none) } -;. -; CHECK: [[META0:![0-9]+]] = !{i32 21} -; CHECK: [[META1]] = !{ptr @simple_await} -; CHECK: [[META2]] = !{i32 0} -; CHECK: [[META3]] = !{i32 8} -; CHECK: [[META4]] = !{ptr @simple_await_entry} -; CHECK: [[META5]] = !{} -; CHECK: [[META6]] = !{ptr @await_with_ret_value} -;. diff --git a/shared/continuations/test/dx/continuation-state.ll b/shared/continuations/test/dx/continuation-state.ll deleted file mode 100644 index a64776f5e3..0000000000 --- a/shared/continuations/test/dx/continuation-state.ll +++ /dev/null @@ -1,204 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 -; RUN: opt --verify-each -passes='lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint' -S %s 2> %t0.stderr | FileCheck -check-prefix=CLEANUP %s -; RUN: count 0 < %t0.stderr -; RUN: opt --verify-each -passes='lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint' \ -; RUN: -S %s 2> %t1.stderr | FileCheck -check-prefix=POST-PROCESS %s -; RUN: count 0 < %t1.stderr - -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" - -%continuation.token = type { } - -declare void @await.void(%continuation.token*) -declare i32 @_cont_GetContinuationStackAddr() -declare %continuation.token* @async_fun() - -@PAYLOAD = external addrspace(20) global [30 x i32] - -define <4 x i32> @simple_await(<4 x i32> %arg) !continuation.registercount !1 { - %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 - call void @await.void(%continuation.token* %tok) - ret <4 x i32> %arg, !continuation.registercount !1 -} - -define void @simple_await_entry(<4 x i32> %arg, <4 x i32> addrspace(1)* %mem) !continuation.entry !0 !continuation.registercount !1 { - %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 - call void @await.void(%continuation.token* %tok) - store <4 x i32> %arg, <4 x i32> addrspace(1)* %mem - ret void, !continuation.registercount !1 -} - -!continuation.maxPayloadRegisterCount = !{!2} -!continuation.stackAddrspace = !{!3} - -!0 = !{} -!1 = !{i32 0} -!2 = !{i32 30} -!3 = !{i32 21} -; CLEANUP-LABEL: define void @simple_await( -; CLEANUP-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], <4 x i32> [[ARG:%.*]]) !continuation.registercount [[META2:![0-9]+]] !continuation [[META3:![0-9]+]] !continuation.state [[META4:![0-9]+]] !continuation.stacksize [[META4]] { -; CLEANUP-NEXT: AllocaSpillBB: -; CLEANUP-NEXT: [[TMP0:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; CLEANUP-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; CLEANUP-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 0 -; CLEANUP-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(21) [[TMP3]], i32 0, i32 0 -; CLEANUP-NEXT: store <4 x i32> [[ARG]], ptr addrspace(21) [[ARG_SPILL_ADDR]], align 4 -; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(21) [[TMP3]], i32 0, i32 1 -; CLEANUP-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CLEANUP-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 24 -; CLEANUP-NEXT: store i32 [[TMP6]], ptr [[TMP4]], align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i32 [[TMP8]], i64 ptrtoint (ptr @simple_await.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 -; CLEANUP-NEXT: unreachable -; -; -; CLEANUP-LABEL: define dso_local void @simple_await.resume.0( -; CLEANUP-SAME: i32 [[TMP0:%.*]]) !continuation.registercount [[META2]] !continuation [[META3]] { -; CLEANUP-NEXT: entryresume.0: -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -24 -; CLEANUP-NEXT: store i32 [[TMP3]], ptr [[TMP1]], align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CLEANUP-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) -; CLEANUP-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i64 0 -; CLEANUP-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(21) [[TMP7]], i32 0, i32 0 -; CLEANUP-NEXT: [[ARG_RELOAD:%.*]] = load <4 x i32>, ptr addrspace(21) [[ARG_RELOAD_ADDR]], align 4 -; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(21) [[TMP7]], i32 0, i32 1 -; CLEANUP-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[RETURNADDR_RELOAD_ADDR]], align 4 -; CLEANUP-NEXT: [[TMP8:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP9]], <4 x i32> [[ARG_RELOAD]]), !continuation.registercount [[META2]] -; CLEANUP-NEXT: unreachable -; -; -; CLEANUP-LABEL: define void @simple_await_entry( -; CLEANUP-SAME: <4 x i32> [[ARG:%.*]], ptr addrspace(1) [[MEM:%.*]]) !continuation.registercount [[META2]] !continuation.entry [[META5:![0-9]+]] !continuation [[META6:![0-9]+]] !continuation.state [[META4]] !continuation.stacksize [[META4]] { -; CLEANUP-NEXT: AllocaSpillBB: -; CLEANUP-NEXT: [[TMP0:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -; CLEANUP-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; CLEANUP-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 0 -; CLEANUP-NEXT: [[MEM_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(21) [[TMP3]], i32 0, i32 1 -; CLEANUP-NEXT: store ptr addrspace(1) [[MEM]], ptr addrspace(21) [[MEM_SPILL_ADDR]], align 4 -; CLEANUP-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(21) [[TMP3]], i32 0, i32 0 -; CLEANUP-NEXT: store <4 x i32> [[ARG]], ptr addrspace(21) [[ARG_SPILL_ADDR]], align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CLEANUP-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 24 -; CLEANUP-NEXT: store i32 [[TMP6]], ptr [[TMP4]], align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i32 [[TMP8]], i64 ptrtoint (ptr @simple_await_entry.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 -; CLEANUP-NEXT: unreachable -; -; -; CLEANUP-LABEL: define dso_local void @simple_await_entry.resume.0( -; CLEANUP-SAME: i32 [[TMP0:%.*]]) !continuation.registercount [[META2]] !continuation [[META6]] { -; CLEANUP-NEXT: entryresume.0: -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -24 -; CLEANUP-NEXT: store i32 [[TMP3]], ptr [[TMP1]], align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CLEANUP-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) -; CLEANUP-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i64 0 -; CLEANUP-NEXT: [[MEM_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(21) [[TMP7]], i32 0, i32 1 -; CLEANUP-NEXT: [[MEM_RELOAD:%.*]] = load ptr addrspace(1), ptr addrspace(21) [[MEM_RELOAD_ADDR]], align 4 -; CLEANUP-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(21) [[TMP7]], i32 0, i32 0 -; CLEANUP-NEXT: [[ARG_RELOAD:%.*]] = load <4 x i32>, ptr addrspace(21) [[ARG_RELOAD_ADDR]], align 4 -; CLEANUP-NEXT: store <4 x i32> [[ARG_RELOAD]], ptr addrspace(1) [[MEM_RELOAD]], align 4 -; CLEANUP-NEXT: call void @continuation.complete() -; CLEANUP-NEXT: unreachable -; -; -; POST-PROCESS-LABEL: define void @simple_await( -; POST-PROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], <4 x i32> [[ARG:%.*]]) !continuation.registercount [[META2:![0-9]+]] !continuation [[META3:![0-9]+]] !continuation.state [[META4:![0-9]+]] !continuation.stacksize [[META4]] { -; POST-PROCESS-NEXT: AllocaSpillBB: -; POST-PROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; POST-PROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP0:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP1:%.*]] = inttoptr i32 [[TMP0]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP1]], i64 0 -; POST-PROCESS-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(21) [[TMP2]], i32 0, i32 0 -; POST-PROCESS-NEXT: store <4 x i32> [[ARG]], ptr addrspace(21) [[ARG_SPILL_ADDR]], align 4 -; POST-PROCESS-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(21) [[TMP2]], i32 0, i32 1 -; POST-PROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 -; POST-PROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 24 -; POST-PROCESS-NEXT: store i32 [[TMP4]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP6:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @async_fun to i64)) -; POST-PROCESS-NEXT: [[TMP7:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @simple_await.resume.0 to i64)) -; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP6]], i32 [[TMP5]], i64 [[TMP7]]), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 -; POST-PROCESS-NEXT: unreachable -; -; -; POST-PROCESS-LABEL: define dso_local void @simple_await.resume.0( -; POST-PROCESS-SAME: i32 [[TMP0:%.*]]) !continuation.registercount [[META2]] !continuation [[META3]] { -; POST-PROCESS-NEXT: entryresume.0: -; POST-PROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; POST-PROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -24 -; POST-PROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i64 0 -; POST-PROCESS-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(21) [[TMP5]], i32 0, i32 0 -; POST-PROCESS-NEXT: [[ARG_RELOAD:%.*]] = load <4 x i32>, ptr addrspace(21) [[ARG_RELOAD_ADDR]], align 4 -; POST-PROCESS-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(21) [[TMP5]], i32 0, i32 1 -; POST-PROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[RETURNADDR_RELOAD_ADDR]], align 4 -; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP6]], <4 x i32> [[ARG_RELOAD]]), !continuation.registercount [[META2]] -; POST-PROCESS-NEXT: unreachable -; -; -; POST-PROCESS-LABEL: define void @simple_await_entry( -; POST-PROCESS-SAME: <4 x i32> [[ARG:%.*]], ptr addrspace(1) [[MEM:%.*]]) !continuation.registercount [[META2]] !continuation.entry [[META5:![0-9]+]] !continuation [[META6:![0-9]+]] !continuation.state [[META4]] !continuation.stacksize [[META4]] { -; POST-PROCESS-NEXT: AllocaSpillBB: -; POST-PROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; POST-PROCESS-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() -; POST-PROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 0 -; POST-PROCESS-NEXT: [[MEM_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(21) [[TMP3]], i32 0, i32 1 -; POST-PROCESS-NEXT: store ptr addrspace(1) [[MEM]], ptr addrspace(21) [[MEM_SPILL_ADDR]], align 4 -; POST-PROCESS-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(21) [[TMP3]], i32 0, i32 0 -; POST-PROCESS-NEXT: store <4 x i32> [[ARG]], ptr addrspace(21) [[ARG_SPILL_ADDR]], align 4 -; POST-PROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 24 -; POST-PROCESS-NEXT: store i32 [[TMP5]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP7:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @async_fun to i64)) -; POST-PROCESS-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @simple_await_entry.resume.0 to i64)) -; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP7]], i32 [[TMP6]], i64 [[TMP8]]), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 -; POST-PROCESS-NEXT: unreachable -; -; -; POST-PROCESS-LABEL: define dso_local void @simple_await_entry.resume.0( -; POST-PROCESS-SAME: i32 [[TMP0:%.*]]) !continuation.registercount [[META2]] !continuation [[META6]] { -; POST-PROCESS-NEXT: entryresume.0: -; POST-PROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; POST-PROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -24 -; POST-PROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i64 0 -; POST-PROCESS-NEXT: [[MEM_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(21) [[TMP5]], i32 0, i32 1 -; POST-PROCESS-NEXT: [[MEM_RELOAD:%.*]] = load ptr addrspace(1), ptr addrspace(21) [[MEM_RELOAD_ADDR]], align 4 -; POST-PROCESS-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(21) [[TMP5]], i32 0, i32 0 -; POST-PROCESS-NEXT: [[ARG_RELOAD:%.*]] = load <4 x i32>, ptr addrspace(21) [[ARG_RELOAD_ADDR]], align 4 -; POST-PROCESS-NEXT: store <4 x i32> [[ARG_RELOAD]], ptr addrspace(1) [[MEM_RELOAD]], align 4 -; POST-PROCESS-NEXT: call void @continuation.complete() -; POST-PROCESS-NEXT: unreachable -; diff --git a/shared/continuations/test/dx/dxil-cont-post-process.ll b/shared/continuations/test/dx/dxil-cont-post-process.ll deleted file mode 100644 index f1e2846dc0..0000000000 --- a/shared/continuations/test/dx/dxil-cont-post-process.ll +++ /dev/null @@ -1,45 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt --verify-each -passes='dxil-cont-post-process,lint' -S %s 2> %t.stderr | FileCheck %s -; RUN: count 0 < %t.stderr - -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" - -%struct.DispatchSystemData = type { i32 } - -declare i32 @continuation.initialContinuationStackPtr() -declare i32 @_cont_GetContinuationStackAddr() -declare i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) -declare %struct.DispatchSystemData @_cont_SetupRayGen() - -define void @RayGen(%struct.DispatchSystemData %0) !lgc.rt.shaderstage !5 !continuation.entry !0 !continuation !3 { -; CHECK-LABEL: define void @RayGen( -; CHECK-SAME: ) !lgc.rt.shaderstage [[META3:![0-9]+]] !continuation.entry [[META4:![0-9]+]] !continuation [[META5:![0-9]+]] { -; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @_cont_GetContinuationStackAddr() -; CHECK-NEXT: store i32 [[TMP1]], ptr [[CSP]], align 4 -; CHECK-NEXT: ret void -; - %csp = alloca i32, align 4 - %cspInit = call i32 @continuation.initialContinuationStackPtr() - store i32 %cspInit, i32* %csp - ret void -} - -define void @RayGen.resume.0(i32 %0, %struct.DispatchSystemData %1) !lgc.rt.shaderstage !5 !continuation !3 { -; CHECK-LABEL: define void @RayGen.resume.0( -; CHECK-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META3]] !continuation [[META5]] { -; CHECK-NEXT: ret void -; - ret void -} - -!dx.entryPoints = !{!1} -!continuation.stackAddrspace = !{!4} - -!0 = !{} -!1 = !{void ()* @RayGen, !"RayGen", null, null, !2} -!2 = !{i32 8, i32 7} -!3 = !{void ()* @RayGen} -!4 = !{i32 21} -!5 = !{i32 0} diff --git a/shared/continuations/test/dx/intrinsics/cont-payload-registers-i32-count.ll b/shared/continuations/test/dx/intrinsics/cont-payload-registers-i32-count.ll deleted file mode 100644 index 4ac94794d8..0000000000 --- a/shared/continuations/test/dx/intrinsics/cont-payload-registers-i32-count.ll +++ /dev/null @@ -1,58 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: grep -v continuation.minPayloadRegisterCount %s | opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S 2> %t0.stderr | FileCheck -check-prefix=NOMINCOUNT %s -; RUN: count 0 < %t0.stderr -; RUN: cat %s | opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S 2> %t1.stderr | FileCheck -check-prefix=MINCOUNT %s -; RUN: count 0 < %t1.stderr - -%struct.DispatchSystemData = type { i32 } - -@debug_global = external global i32 - -declare i32 @_AmdContPayloadRegistersI32Count() - -declare %struct.DispatchSystemData @_cont_SetupRayGen() - -declare !types !9 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) - -define void @main() { -; NOMINCOUNT-LABEL: define void @main( -; NOMINCOUNT-SAME: ) !continuation [[META9:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META10:![0-9]+]] !continuation.registercount [[META5]] !continuation.state [[META5]] { -; NOMINCOUNT-NEXT: entry: -; NOMINCOUNT-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() -; NOMINCOUNT-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 -; NOMINCOUNT-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; NOMINCOUNT-NEXT: store i32 15, ptr @debug_global, align 4 -; NOMINCOUNT-NEXT: call void @continuation.complete() -; NOMINCOUNT-NEXT: unreachable -; -; MINCOUNT-LABEL: define void @main( -; MINCOUNT-SAME: ) !continuation [[META10:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META11:![0-9]+]] !continuation.registercount [[META5]] !continuation.state [[META5]] { -; MINCOUNT-NEXT: entry: -; MINCOUNT-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() -; MINCOUNT-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 -; MINCOUNT-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; MINCOUNT-NEXT: store i32 11, ptr @debug_global, align 4 -; MINCOUNT-NEXT: call void @continuation.complete() -; MINCOUNT-NEXT: unreachable -; -entry: - %val = call i32 @_AmdContPayloadRegistersI32Count() - store i32 %val, i32* @debug_global, align 4 - ret void -} - -!dx.entryPoints = !{!0, !3} -!continuation.maxPayloadRegisterCount = !{!7} -!continuation.minPayloadRegisterCount = !{!8} - -!0 = !{null, !"", null, !1, !6} -!1 = !{!2, null, null, null} -!2 = !{!3} -!3 = !{void ()* @main, !"main", null, null, !4} -!4 = !{i32 8, i32 7, i32 6, i32 16, i32 7, i32 8, i32 5, !5} -!5 = !{i32 0} -!6 = !{i32 0, i64 65536} -!7 = !{i32 15} -!8 = !{i32 11} -!9 = !{!"function", i32 poison, !10} -!10 = !{i32 0, %struct.DispatchSystemData poison} diff --git a/shared/continuations/test/dx/lower-rt-pipeline-large-payload.ll b/shared/continuations/test/dx/lower-rt-pipeline-large-payload.ll deleted file mode 100644 index 148b7ef948..0000000000 --- a/shared/continuations/test/dx/lower-rt-pipeline-large-payload.ll +++ /dev/null @@ -1,512 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 -; Test handling of large payloads and payload spilling. -; We set the max number of payload registers to 2, so relatively small payloads need to spill already. -; This results in a bit nicer result IR, containing less "spam" copying payload fields around. -; We also set a max hit attribute size ensuring there is no need for hit attribute storage in the payload. -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s -; RUN: count 0 < %t0.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s -; RUN: count 0 < %t1.stderr - -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" - -%dx.types.Handle = type { i8* } -%dx.types.ResourceProperties = type { i32, i32 } -; Doesn't need to spill: -%struct.SmallPayload = type { [1 x i32] } -; These two need to spill: -%struct.MediumPayload = type { [3 x i32] } -%struct.LargePayload = type { [5 x i32] } -%struct.DispatchSystemData = type { <3 x i32> } -%struct.TraversalData = type { %struct.SystemData, %struct.HitData, <3 x float>, <3 x float>, float, i64 } -%struct.SystemData = type { %struct.DispatchSystemData } -%struct.HitData = type { <3 x float>, <3 x float>, float, i32 } -%struct.AnyHitTraversalData = type { %struct.TraversalData, %struct.HitData } -%struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } - -@"\01?Scene@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 - -declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #3 -declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #4 -declare !types !200 void @dx.op.traceRay.struct.SmallPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.SmallPayload*) -declare !types !201 void @dx.op.traceRay.struct.MediumPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.MediumPayload*) -declare !types !202 void @dx.op.traceRay.struct.LargePayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.LargePayload*) - -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #1 !types !203 { - %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 - %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 - %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 - %trav_data2 = insertvalue %struct.TraversalData %trav_data, i64 -1, 5 - %newdata = call %struct.DispatchSystemData @_AmdAwaitTraversal(i64 4, %struct.TraversalData %trav_data2) - store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 - call void @_AmdRestoreSystemData(%struct.DispatchSystemData* %data) - ret void -} - -define void @Miss(%struct.SmallPayload* noalias nocapture %outerpayload) !types !204 !lgc.rt.attribute.size !32 { - %p1 = alloca %struct.SmallPayload - %p2 = alloca %struct.MediumPayload - %p3 = alloca %struct.LargePayload - ; Avoid undefs being written to payload registers - ; caused by uninitialized payloads. - store %struct.SmallPayload zeroinitializer, %struct.SmallPayload* %p1 - store %struct.MediumPayload zeroinitializer, %struct.LargePayload* %p2 - store %struct.LargePayload zeroinitializer, %struct.MediumPayload* %p3 - - %t1 = load %dx.types.Handle, %dx.types.Handle* @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 - %t2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %t1) - %t3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %t2, %dx.types.ResourceProperties { i32 16, i32 0 }) - - call void @dx.op.traceRay.struct.SmallPayload(i32 157, %dx.types.Handle %t3, i32 16, i32 -1, i32 0, i32 1, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0x3F50624DE0000000, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+04, %struct.SmallPayload* nonnull %p1) - call void @dx.op.traceRay.struct.MediumPayload(i32 157, %dx.types.Handle %t3, i32 16, i32 -1, i32 0, i32 1, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0x3F50624DE0000000, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+04, %struct.MediumPayload* nonnull %p2) - call void @dx.op.traceRay.struct.LargePayload(i32 157, %dx.types.Handle %t3, i32 16, i32 -1, i32 0, i32 1, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0x3F50624DE0000000, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+04, %struct.LargePayload* nonnull %p3) - ret void -} - -; Function Attrs: alwaysinline -declare %struct.DispatchSystemData @_cont_SetupRayGen() #1 - -; Function Attrs: alwaysinline -declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) #1 - -; Function Attrs: alwaysinline -declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) #1 - -; Function Attrs: alwaysinline -declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #1 - -; Function Attrs: alwaysinline -declare !types !19 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #1 - -; Function Attrs: alwaysinline -declare !types !21 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #1 - -; Function Attrs: alwaysinline -declare !types !22 i1 @_cont_IsEndSearch(%struct.TraversalData*) #1 - -; Function Attrs: nounwind memory(read) -declare !types !24 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #2 - -; Function Attrs: nounwind memory(none) -declare !types !26 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #3 - -; Function Attrs: nounwind memory(none) -declare !types !28 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #3 - -; Function Attrs: alwaysinline -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #1 !types !30 { - ret i32 5 -} - -attributes #0 = { nounwind } -attributes #1 = { alwaysinline } -attributes #2 = { nounwind memory(read) } -attributes #3 = { nounwind memory(none) } - -!llvm.ident = !{!0} -!dx.version = !{!1} -!dx.valver = !{!1} -!dx.shaderModel = !{!2} -!dx.typeAnnotations = !{!3} -!dx.entryPoints = !{!12, !14} -!continuation.maxPayloadRegisterCount = !{!31} - -!0 = !{!"dxcoob 2019.05.00"} -!1 = !{i32 1, i32 7} -!2 = !{!"lib", i32 6, i32 7} -!3 = !{i32 1, void (%struct.SmallPayload*)* @Miss, !4} -!4 = !{!5, !7} -!5 = !{i32 1, !6, !6} -!6 = !{} -!7 = !{i32 2, !6, !6} -!9 = !{!10, !11, !11} -!10 = !{i32 0, i32 259} -!11 = !{i32 0, i32 513} -!12 = !{null, !"", null, null, !13} -!13 = !{i32 0, i64 32} -!14 = !{void (%struct.SmallPayload*)* @Miss, !"Miss", null, null, !15} -!15 = !{i32 8, i32 11, i32 6, i32 24, i32 5, !16} -!16 = !{i32 0} -!19 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !20} -!20 = !{i32 0, %struct.SystemData poison} -!21 = !{!"function", !"void", !20, %struct.BuiltInTriangleIntersectionAttributes poison} -!22 = !{!"function", i1 poison, !23} -!23 = !{i32 0, %struct.TraversalData poison} -!24 = !{!"function", i32 poison, !20, !25} -!25 = !{i32 0, %struct.HitData poison} -!26 = !{!"function", !"void", !27} -!27 = !{i32 0, %struct.DispatchSystemData poison} -!28 = !{!"function", !"void", !29} -!29 = !{i32 0, %struct.AnyHitTraversalData poison} -!30 = !{!"function", i32 poison, !27} -!31 = !{i32 2} -!32 = !{i32 8} - -!100 = !{i32 0, %struct.SmallPayload poison} -!101 = !{i32 0, %struct.MediumPayload poison} -!102 = !{i32 0, %struct.LargePayload poison} -!103 = !{i32 0, %struct.DispatchSystemData poison} -!200 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !100} -!201 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !101} -!202 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !102} -!203 = !{!"function", !"void", !103, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!204 = !{!"function", !"void", !100} -; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @Miss( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size !16 !lgc.rt.shaderstage [[META17:![0-9]+]] !continuation.registercount [[META18:![0-9]+]] !continuation [[META19:![0-9]+]] !continuation.stacksize [[META20:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-NEXT: [[P1:%.*]] = alloca [[STRUCT_SMALLPAYLOAD:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[P2:%.*]] = alloca [[STRUCT_MEDIUMPAYLOAD:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[P3:%.*]] = alloca [[STRUCT_LARGEPAYLOAD:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_SMALLPAYLOAD]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = call ptr @continuation.getContinuationStackOffset() -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 16 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP6]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP8]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SMALLPAYLOAD]] zeroinitializer, ptr [[P1]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_MEDIUMPAYLOAD]] zeroinitializer, ptr [[P2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_LARGEPAYLOAD]] zeroinitializer, ptr [[P3]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[T1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T1]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 -1, 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[P1]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load [1 x i32], ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP12]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META18]], !continuation.returnedRegistercount !18 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP13]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SMALLPAYLOAD]] poison, ptr [[P1]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[P1]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP16]], ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP14]], ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT10:%.*]] -; LOWERRAYTRACINGPIPELINE: .split10: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I1:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I2:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I1]], 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I3:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I2]], 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I4:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I3]], i64 -1, 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = call ptr @continuation.getContinuationStackOffset() -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = add i32 [[TMP20]], -16 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_MEDIUMPAYLOAD]], ptr [[P2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP23]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP23]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP23]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I4]]), !continuation.registercount [[META14:![0-9]+]], !continuation.returnedRegistercount !14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP30]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_MEDIUMPAYLOAD]] poison, ptr [[P2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_MEDIUMPAYLOAD]], ptr [[P2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP32]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[TMP33]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr [[TMP34]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[TMP33]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = load i32, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP37]], ptr [[TMP36]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr [[TMP33]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = load i32, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP39]], ptr [[TMP38]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP31]], ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT9:%.*]] -; LOWERRAYTRACINGPIPELINE: .split9: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I5:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP41]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I6:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I5]], 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I7:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I6]], 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I8:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I7]], i64 -1, 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = call ptr @continuation.getContinuationStackOffset() -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = add i32 [[TMP43]], -16 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP44]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_LARGEPAYLOAD]], ptr [[P3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr [[TMP45]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[TMP46]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP48]], ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = getelementptr i32, ptr [[TMP46]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP50]], ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = getelementptr i32, ptr [[TMP46]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP52]], ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = getelementptr i32, ptr [[TMP46]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP54]], ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 4), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = getelementptr i32, ptr [[TMP46]], i64 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP56]], ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 5), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I8]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP57]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_LARGEPAYLOAD]] poison, ptr [[P3]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_LARGEPAYLOAD]], ptr [[P3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP59]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = getelementptr i32, ptr [[TMP60]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP62]], ptr [[TMP61]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = getelementptr i32, ptr [[TMP60]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP64]], ptr [[TMP63]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[TMP60]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP66]], ptr [[TMP65]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = getelementptr i32, ptr [[TMP60]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 4), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP68]], ptr [[TMP67]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP60]], i64 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 5), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP70]], ptr [[TMP69]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP58]], ptr [[TMP41]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] -; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = load [1 x i32], ptr [[TMP71]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP72]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = call ptr @continuation.getContinuationStackOffset() -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = add i32 [[TMP74]], -16 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP75]], ptr [[TMP73]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP76]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP77]], !continuation.registercount [[META18]] -; -; -; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( -; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) #[[ATTR1:[0-9]+]] { -; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 5 -; -; -; DXILCONTPOSTPROCESS-LABEL: define void @Miss( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size !15 !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] !continuation.stacksize [[META19:![0-9]+]] !continuation.state [[META20:![0-9]+]] { -; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: -; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 16 -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(21) [[TMP3]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 16 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP5]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[TMP6]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT_SPILL_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(21) [[TMP3]], i32 0, i32 1 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_0_EXTRACT]], ptr addrspace(21) [[DOTFCA_0_EXTRACT_SPILL_ADDR]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; DXILCONTPOSTPROCESS-NEXT: [[T1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; DXILCONTPOSTPROCESS-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T1]]) -; DXILCONTPOSTPROCESS-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) -; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 -1, 5 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT30:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT30_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT30]], 0 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_0_INSERT30_FCA_0_EXTRACT]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 12 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP9]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Miss.resume.0 to i64)) -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP10]], i64 [[TMP11]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount !17 -; DXILCONTPOSTPROCESS-NEXT: unreachable -; -; -; DXILCONTPOSTPROCESS-LABEL: define dso_local void @Miss.resume.0( -; DXILCONTPOSTPROCESS-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.attribute.size !15 !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META17]] !continuation [[META18]] { -; DXILCONTPOSTPROCESS-NEXT: entryresume.0: -; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -12 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP3]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP5]], i64 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[TMP7]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT32:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT13:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 -; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; DXILCONTPOSTPROCESS-NEXT: [[T110:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; DXILCONTPOSTPROCESS-NEXT: [[T29:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T110]]) -; DXILCONTPOSTPROCESS-NEXT: [[T38:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T29]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T38]]) -; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT13]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[SYS_DATA_I2:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I1_FCA_0_INSERT]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I3:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I2]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA2_I4:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I3]], i64 -1, 5 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], -16 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 1) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP11]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP12]], i32 -2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP13]], i32 0, i32 0, i64 2 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP14]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP15]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP16]], i32 -2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP17]], i32 0, i32 0, i64 3 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP18]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 12 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP20]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Miss.resume.1 to i64)) -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP21]], i64 [[TMP22]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I4]]), !continuation.registercount [[META13:![0-9]+]], !continuation.returnedRegistercount !13 -; DXILCONTPOSTPROCESS-NEXT: unreachable -; -; -; DXILCONTPOSTPROCESS-LABEL: define dso_local void @Miss.resume.1( -; DXILCONTPOSTPROCESS-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.attribute.size !15 !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META13]] !continuation [[META18]] { -; DXILCONTPOSTPROCESS-NEXT: entryresume.1: -; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -12 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP3]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP5]], i64 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 1) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP8]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP9]], i32 -2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP10]], i32 0, i32 0, i64 2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(21) [[TMP11]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP13]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP14]], i32 -2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP15]], i32 0, i32 0, i64 3 -; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(21) [[TMP16]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT15:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 -; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; DXILCONTPOSTPROCESS-NEXT: [[T17:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; DXILCONTPOSTPROCESS-NEXT: [[T26:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T17]]) -; DXILCONTPOSTPROCESS-NEXT: [[T35:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T26]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T35]]) -; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I5_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT15]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[SYS_DATA_I6:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I5_FCA_0_INSERT]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I7:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I6]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA2_I8:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I7]], i64 -1, 5 -; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], -16 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP20]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 1) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP21]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP22]], i32 -2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP23]], i32 0, i32 0, i64 2 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP24]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP25]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP26]], i32 -2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP27]], i32 0, i32 0, i64 3 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP28]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP30:%.*]] = inttoptr i32 [[TMP29]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP30]], i32 -2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP31]], i32 0, i32 0, i64 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP32]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP34:%.*]] = inttoptr i32 [[TMP33]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP34]], i32 -2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspace(21) [[TMP35]], i32 0, i32 0, i64 5 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP36]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP37:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP38:%.*]] = add i32 [[TMP37]], 12 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP38]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP39:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP40:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Miss.resume.2 to i64)) -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP39]], i64 [[TMP40]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I8]]), !continuation.registercount [[META13]], !continuation.returnedRegistercount !13 -; DXILCONTPOSTPROCESS-NEXT: unreachable -; -; -; DXILCONTPOSTPROCESS-LABEL: define dso_local void @Miss.resume.2( -; DXILCONTPOSTPROCESS-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.attribute.size !15 !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META13]] !continuation [[META18]] { -; DXILCONTPOSTPROCESS-NEXT: entryresume.2: -; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -12 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP3]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP5]], i64 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 1) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP8]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP9]], i32 -2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP10]], i32 0, i32 0, i64 2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(21) [[TMP11]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP13]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP14]], i32 -2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP15]], i32 0, i32 0, i64 3 -; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(21) [[TMP16]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP18]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP19]], i32 -2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP20]], i32 0, i32 0, i64 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(21) [[TMP21]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP23]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(21) [[TMP24]], i32 -2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspace(21) [[TMP25]], i32 0, i32 0, i64 5 -; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(21) [[TMP26]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT17:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 -; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(21) [[TMP6]], i32 0, i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT_RELOAD:%.*]] = load i32, ptr addrspace(21) [[DOTFCA_0_EXTRACT_RELOAD_ADDR]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(21) [[TMP6]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[RETURNADDR_RELOAD_ADDR]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[DOTFCA_0_EXTRACT_RELOAD]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_0_INSERT_FCA_0_EXTRACT]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], -16 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP29]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT12:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT17]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP30:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP30]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT12]]), !continuation.registercount [[META17]] -; DXILCONTPOSTPROCESS-NEXT: unreachable -; -; -; DXILCONTPOSTPROCESS-LABEL: define i32 @_cont_GetLocalRootIndex( -; DXILCONTPOSTPROCESS-SAME: ptr [[DATA:%.*]]) #[[ATTR1:[0-9]+]] { -; DXILCONTPOSTPROCESS-NEXT: ret i32 5 -; diff --git a/shared/continuations/test/dx/lower-rt-pipeline-simple-call-shader.ll b/shared/continuations/test/dx/lower-rt-pipeline-simple-call-shader.ll deleted file mode 100644 index 16f794a3e2..0000000000 --- a/shared/continuations/test/dx/lower-rt-pipeline-simple-call-shader.ll +++ /dev/null @@ -1,342 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 -; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s -; RUN: count 0 < %t0.stderr -; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,remove-types-metadata' \ -; RUN: -S 2> %t1.stderr | FileCheck -check-prefix=CLEANUP %s -; RUN: count 0 < %t1.stderr -; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,remove-types-metadata' \ -; RUN: -S 2> %t2.stderr | FileCheck -check-prefix=SAVESTATE %s -; RUN: count 0 < %t2.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t3.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-CPS %s -; RUN: count 0 < %t3.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,remove-types-metadata' \ -; RUN: -S %s 2> %t4.stderr | FileCheck -check-prefix=CLEANUP-CPS %s -; RUN: count 0 < %t4.stderr - -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" - -%dx.types.Handle = type { i8* } -%struct.DispatchSystemData = type { i32 } -%struct.TraversalData = type { %struct.SystemData } -%struct.SystemData = type { %struct.DispatchSystemData } -%struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } -%struct.MyParams = type { i32 } -%"class.RWTexture2D >" = type { <4 x float> } - -@"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 - -declare i32 @_cont_GetContinuationStackAddr() - -declare %struct.DispatchSystemData @_cont_SetupRayGen() - -declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) - -declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) - -declare !types !13 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) - -declare !types !15 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) - -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !types !17 { - ret i32 5 -} - -; Function Attrs: nounwind memory(none) -declare !types !22 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone %data) #1 - -; Function Attrs: nounwind memory(none) -declare !types !22 <3 x i32> @_cont_DispatchRaysDimensions3(%struct.DispatchSystemData* nocapture readnone %data) #1 - -define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #1 !types !18 { - %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 - %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) - store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 - call void @_AmdRestoreSystemData(%struct.DispatchSystemData* %data) - ret void -} - -define void @called(%struct.MyParams* %params) !types !19 { - call void @dx.op.callShader.struct.MyParams(i32 159, i32 2, %struct.MyParams* nonnull %params) - %a = call i32 @dx.op.dispatchRaysIndex.i32(i32 145, i8 0) - %b = call i32 @dx.op.dispatchRaysDimensions.i32(i32 146, i8 0) - ret void -} - -; Function Attrs: nounwind -declare !types !21 void @dx.op.callShader.struct.MyParams(i32, i32, %struct.MyParams*) #0 - -; Function Attrs: nounwind memory(none) -declare i32 @dx.op.dispatchRaysDimensions.i32(i32, i8) #1 - -; Function Attrs: nounwind memory(none) -declare i32 @dx.op.dispatchRaysIndex.i32(i32, i8) #1 -attributes #0 = { nounwind } -attributes #1 = { alwaysinline } - -!llvm.ident = !{!0} -!dx.version = !{!1} -!dx.valver = !{!1} -!dx.shaderModel = !{!2} -!dx.entryPoints = !{!3, !6} -!lgc.cps.module = !{} - -!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"} -!1 = !{i32 1, i32 6} -!2 = !{!"lib", i32 6, i32 6} -!3 = !{null, !"", null, !4, !12} -!4 = !{!5, !9, null, null} -!5 = !{!6} -!6 = !{void (%struct.MyParams*)* @called, !"called", null, null, !7} -!7 = !{i32 8, i32 12, i32 6, i32 16, i32 7, i32 8, i32 5, !8} -!8 = !{i32 0} -!9 = !{!10} -!10 = !{i32 0, %"class.RWTexture2D >"* bitcast (%dx.types.Handle* @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" to %"class.RWTexture2D >"*), !"RenderTarget", i32 0, i32 0, i32 1, i32 2, i1 false, i1 false, i1 false, !11} -!11 = !{i32 0, i32 9} -!12 = !{i32 0, i64 65536} -!13 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !14} -!14 = !{i32 0, %struct.SystemData poison} -!15 = !{!"function", !"void", !16} -!16 = !{i32 0, %struct.DispatchSystemData poison} -!17 = !{!"function", i32 poison, !16} -!18 = !{!"function", !"void", !16, i32 poison} -!19 = !{!"function", !"void", !20} -!20 = !{i32 0, %struct.MyParams poison} -!21 = !{!"function", !"void", i32 poison, i32 poison, !20} -!22 = !{!"function", <3 x i32> poison, !16} - -; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( -; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) { -; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 5 -; -; -; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @called( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_MYPARAMS:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP6]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount !17 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP7]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_MYPARAMS]] poison, ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP8]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] -; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; LOWERRAYTRACINGPIPELINE-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP11]], i8 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() -; LOWERRAYTRACINGPIPELINE-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP12]], i8 0 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP15]], !continuation.registercount [[META17]] -; -; -; CLEANUP-LABEL: define i32 @_cont_GetLocalRootIndex( -; CLEANUP-SAME: ptr [[DATA:%.*]]) { -; CLEANUP-NEXT: ret i32 5 -; -; -; CLEANUP-LABEL: define void @called( -; CLEANUP-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] !continuation.state [[META19:![0-9]+]] !continuation.stacksize [[META19]] { -; CLEANUP-NEXT: AllocaSpillBB: -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) -; CLEANUP-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i64 0 -; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(21) [[TMP4]], i32 0, i32 0 -; CLEANUP-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; CLEANUP-NEXT: store i32 [[TMP5]], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: [[TMP6:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -; CLEANUP-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 8 -; CLEANUP-NEXT: store i32 [[TMP8]], ptr [[TMP6]], align 4 -; CLEANUP-NEXT: [[TMP9:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP10]], i64 ptrtoint (ptr @called.resume.0 to i64), [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount !17 -; CLEANUP-NEXT: unreachable -; -; -; CLEANUP-LABEL: define dso_local void @called.resume.0( -; CLEANUP-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META17]] !continuation [[META18]] { -; CLEANUP-NEXT: entryresume.0: -; CLEANUP-NEXT: [[TMP2:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -8 -; CLEANUP-NEXT: store i32 [[TMP4]], ptr [[TMP2]], align 4 -; CLEANUP-NEXT: [[TMP5:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = inttoptr i32 [[TMP6]] to ptr addrspace(21) -; CLEANUP-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP7]], i64 0 -; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 -; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(21) [[TMP8]], i32 0, i32 0 -; CLEANUP-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[RETURNADDR_RELOAD_ADDR]], align 4 -; CLEANUP-NEXT: [[TMP10:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; CLEANUP-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP10]], i8 0 -; CLEANUP-NEXT: [[TMP11:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() -; CLEANUP-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP11]], i8 0 -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: store i32 [[TMP9]], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 -; CLEANUP-NEXT: [[TMP12:%.*]] = call ptr @continuation.getContinuationStackOffset() -; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP13]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] -; CLEANUP-NEXT: unreachable -; -; -; SAVESTATE-LABEL: define i32 @_cont_GetLocalRootIndex( -; SAVESTATE-SAME: ptr [[DATA:%.*]]) { -; SAVESTATE-NEXT: ret i32 5 -; -; -; SAVESTATE-LABEL: define void @called( -; SAVESTATE-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META15:![0-9]+]] !continuation.registercount [[META16:![0-9]+]] !continuation [[META17:![0-9]+]] !continuation.state [[META18:![0-9]+]] !continuation.stacksize [[META18]] { -; SAVESTATE-NEXT: AllocaSpillBB: -; SAVESTATE-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; SAVESTATE-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; SAVESTATE-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 0 -; SAVESTATE-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(21) [[TMP3]], i32 0, i32 0 -; SAVESTATE-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 -; SAVESTATE-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; SAVESTATE-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; SAVESTATE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) -; SAVESTATE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; SAVESTATE-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; SAVESTATE-NEXT: store i32 [[TMP4]], ptr addrspace(20) @PAYLOAD, align 4 -; SAVESTATE-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 8 -; SAVESTATE-NEXT: store i32 [[TMP6]], ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 -; SAVESTATE-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP7]], i64 ptrtoint (ptr @called.resume.0 to i64), [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META16]], !continuation.returnedRegistercount !16 -; SAVESTATE-NEXT: unreachable -; -; -; SAVESTATE-LABEL: define dso_local void @called.resume.0( -; SAVESTATE-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META15]] !continuation.registercount [[META16]] !continuation [[META17]] { -; SAVESTATE-NEXT: entryresume.0: -; SAVESTATE-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; SAVESTATE-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -8 -; SAVESTATE-NEXT: store i32 [[TMP3]], ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; SAVESTATE-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP5]], i64 0 -; SAVESTATE-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; SAVESTATE-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 -; SAVESTATE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; SAVESTATE-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(21) [[TMP6]], i32 0, i32 0 -; SAVESTATE-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[RETURNADDR_RELOAD_ADDR]], align 4 -; SAVESTATE-NEXT: [[TMP8:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; SAVESTATE-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 -; SAVESTATE-NEXT: [[TMP9:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() -; SAVESTATE-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP9]], i8 0 -; SAVESTATE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) -; SAVESTATE-NEXT: store i32 [[TMP7]], ptr addrspace(20) @PAYLOAD, align 4 -; SAVESTATE-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 -; SAVESTATE-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 -; SAVESTATE-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP10]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META16]] -; SAVESTATE-NEXT: unreachable -; -; -; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr [[DATA:%.*]]) { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret i32 5 -; -; -; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @called( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !lgc.cps [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_MYPARAMS:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP4]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] (...) @lgc.cps.await.s_struct.DispatchSystemDatas(i32 2, i32 2, i32 5) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_MYPARAMS]] poison, ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP5]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[DOTSPLIT:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: .split: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP9]], i8 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP11]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 2, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP12]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; -; -; CLEANUP-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( -; CLEANUP-CPS-SAME: ptr [[DATA:%.*]]) { -; CLEANUP-CPS-NEXT: ret i32 5 -; -; -; CLEANUP-CPS-LABEL: define void @called( -; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !lgc.cps [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] { -; CLEANUP-CPS-NEXT: AllocaSpillBB: -; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) -; CLEANUP-CPS-NEXT: [[RETURN_ADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 -; CLEANUP-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(32) [[RETURN_ADDR_SPILL_ADDR]], align 4 -; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; CLEANUP-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; CLEANUP-CPS-NEXT: store i32 undef, ptr @PAYLOAD, align 4 -; CLEANUP-CPS-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @called.resume.0) -; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, {} poison, i32 [[TMP2]], i32 5) -; CLEANUP-CPS-NEXT: unreachable -; -; -; CLEANUP-CPS-LABEL: define dso_local void @called.resume.0( -; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META16]] !lgc.cps [[META17]] !continuation [[META18]] { -; CLEANUP-CPS-NEXT: entryresume.0: -; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; CLEANUP-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], ptr [[TMP4]], align 4 -; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) -; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr @PAYLOAD, align 4 -; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 -; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CLEANUP-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP5]], i32 0, i32 0 -; CLEANUP-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR]], align 4 -; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP4]]) -; CLEANUP-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP7]], i8 0 -; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[TMP4]]) -; CLEANUP-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 -; CLEANUP-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-CPS-NEXT: store i32 [[TMP6]], ptr @PAYLOAD, align 4 -; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 -; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) -; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 2, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) -; CLEANUP-CPS-NEXT: unreachable -; diff --git a/shared/continuations/test/dx/lower-rt-pipeline.ll b/shared/continuations/test/dx/lower-rt-pipeline.ll deleted file mode 100644 index 5bc2877d82..0000000000 --- a/shared/continuations/test/dx/lower-rt-pipeline.ll +++ /dev/null @@ -1,1565 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 -; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s -; RUN: count 0 < %t0.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-CPS %s -; RUN: count 0 < %t1.stderr - -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" - -%dx.types.Handle = type { i8* } -%struct.DispatchSystemData = type { <3 x i32> } -%struct.TraversalData = type { %struct.SystemData, %struct.HitData, <3 x float>, <3 x float>, float, i64 } -%struct.SystemData = type { %struct.DispatchSystemData } -%struct.HitData = type { <3 x float>, <3 x float>, float, i32 } -%struct.AnyHitTraversalData = type { %struct.TraversalData, %struct.HitData } -%struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } -%struct.RayPayload = type { <4 x float> } -%dx.types.ResourceProperties = type { i32, i32 } -%struct.BuiltInTriangleIntersectionAttributes2 = type { <2 x float> } -%struct.RaytracingAccelerationStructure = type { i32 } -%"class.RWTexture2D >" = type { <4 x float> } - -@"\01?Scene@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 -@"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 - -declare i32 @_cont_GetContinuationStackAddr() #0 - -declare %struct.DispatchSystemData @_cont_SetupRayGen() #0 - -declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) #0 - -declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) #0 - -declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #0 - -declare !types !32 %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData*) #0 - -declare !types !34 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 - -declare !types !36 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 - -declare !types !37 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 - -declare !types !38 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) - -declare !types !40 i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 - -declare !types !42 i32 @_cont_HitKind(%struct.SystemData*) #0 - -; Function Attrs: nounwind -declare i64 @_AmdGetResumePointAddr() #1 - -; Function Attrs: nounwind -declare !types !43 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 - -; Function Attrs: nounwind -declare !types !44 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #1 - -; Function Attrs: nounwind -declare !types !43 void @_cont_AcceptHitAndEndSearch(%struct.DispatchSystemData* nocapture readnone) #1 - -; Function Attrs: nounwind -declare !types !44 void @_cont_AcceptHit(%struct.AnyHitTraversalData* nocapture readnone) #1 - -; Function Attrs: nounwind -declare !types !43 void @_cont_IgnoreHit(%struct.DispatchSystemData* nocapture readnone) #1 - -; Function Attrs: nounwind -declare !types !44 void @_AmdAcceptHitAttributes(%struct.AnyHitTraversalData* nocapture readnone) #1 - -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !45 { - %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 - %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 - %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 - %addr = call i64 @_AmdGetResumePointAddr() #3 - %trav_data2 = insertvalue %struct.TraversalData %trav_data, i64 %addr, 5 - %newdata = call %struct.DispatchSystemData @_AmdAwaitTraversal(i64 4, %struct.TraversalData %trav_data2) - store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 - call void @_AmdRestoreSystemData(%struct.DispatchSystemData* %data) - ret void -} - -define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !types !46 { - %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 - %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) - store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 - call void @_AmdRestoreSystemData(%struct.DispatchSystemData* %data) - ret void -} - -define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !types !47 { - %origTPtr = getelementptr inbounds %struct.AnyHitTraversalData, %struct.AnyHitTraversalData* %data, i32 0, i32 0, i32 4 - %origT = load float, float* %origTPtr, align 4 - %isNoHit = fcmp fast uge float %t, %origT - br i1 %isNoHit, label %isEnd, label %callAHit - -callAHit: ; preds = %0 - %trav_data = load %struct.AnyHitTraversalData, %struct.AnyHitTraversalData* %data, align 4 - %newdata = call %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64 3, %struct.AnyHitTraversalData %trav_data, float %t, i32 %hitKind) - store %struct.AnyHitTraversalData %newdata, %struct.AnyHitTraversalData* %data, align 4 - call void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData* %data) - ret i1 true - -isEnd: ; preds = %0 - ; Call AcceptHitAttributes, just to simulate it - call void @_AmdAcceptHitAttributes(%struct.AnyHitTraversalData* %data) - ret i1 false -} - -define <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* %data) !types !48 { - %resPtr.1 = getelementptr %struct.DispatchSystemData, %struct.DispatchSystemData* %data, i32 0, i32 0, i32 0 - %res.1 = load i32, i32* %resPtr.1, align 4 - %resPtr.2 = getelementptr %struct.DispatchSystemData, %struct.DispatchSystemData* %data, i32 0, i32 0, i32 1 - %res.2 = load i32, i32* %resPtr.2, align 4 - %resPtr.3 = getelementptr %struct.DispatchSystemData, %struct.DispatchSystemData* %data, i32 0, i32 0, i32 2 - %res.3 = load i32, i32* %resPtr.3, align 4 - %val.0 = insertelement <3 x i32> undef, i32 %res.1, i32 0 - %val.1 = insertelement <3 x i32> %val.0, i32 %res.2, i32 1 - %val.2 = insertelement <3 x i32> %val.1, i32 %res.3, i32 2 - ret <3 x i32> %val.2 -} - -define <3 x float> @_cont_ObjectRayOrigin3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !types !49 { - %resPtr.1 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 0, i32 0 - %res.1 = load float, float* %resPtr.1, align 4 - %resPtr.2 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 0, i32 1 - %res.2 = load float, float* %resPtr.2, align 4 - %resPtr.3 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 0, i32 2 - %res.3 = load float, float* %resPtr.3, align 4 - %val.0 = insertelement <3 x float> undef, float %res.1, i32 0 - %val.1 = insertelement <3 x float> %val.0, float %res.2, i32 1 - %val.2 = insertelement <3 x float> %val.1, float %res.3, i32 2 - ret <3 x float> %val.2 -} - -define <3 x float> @_cont_ObjectRayDirection3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !types !49 { - %resPtr.1 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 1, i32 0 - %res.1 = load float, float* %resPtr.1, align 4 - %resPtr.2 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 1, i32 1 - %res.2 = load float, float* %resPtr.2, align 4 - %resPtr.3 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 1, i32 2 - %res.3 = load float, float* %resPtr.3, align 4 - %val.0 = insertelement <3 x float> undef, float %res.1, i32 0 - %val.1 = insertelement <3 x float> %val.0, float %res.2, i32 1 - %val.2 = insertelement <3 x float> %val.1, float %res.3, i32 2 - ret <3 x float> %val.2 -} - -define float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !types !51 { - %resPtr = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 2 - %res = load float, float* %resPtr, align 4 - ret float %res -} - -; Function Attrs: nounwind -define void @MyRayGen() #2 { - %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 - %2 = load %dx.types.Handle, %dx.types.Handle* @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 - %3 = alloca %struct.RayPayload, align 4 - %4 = bitcast %struct.RayPayload* %3 to i8* - call void @llvm.lifetime.start.p0i8(i64 16, i8* %4) #1 - %5 = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %3, i32 0, i32 0 - store <4 x float> zeroinitializer, <4 x float>* %5, align 4, !tbaa !52 - %6 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) - %7 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %6, %dx.types.ResourceProperties { i32 16, i32 0 }) - call void @dx.op.traceRay.struct.RayPayload(i32 157, %dx.types.Handle %7, i32 16, i32 -1, i32 0, i32 1, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0x3F50624DE0000000, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+04, %struct.RayPayload* nonnull %3) - %8 = load <4 x float>, <4 x float>* %5, align 4, !tbaa !52 - %9 = call i32 @dx.op.dispatchRaysIndex.i32(i32 145, i8 0) - %10 = call i32 @dx.op.dispatchRaysIndex.i32(i32 145, i8 1) - %11 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %2) - %12 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %11, %dx.types.ResourceProperties { i32 4098, i32 1033 }) - %13 = extractelement <4 x float> %8, i64 0 - %14 = extractelement <4 x float> %8, i64 1 - %15 = extractelement <4 x float> %8, i64 2 - %16 = extractelement <4 x float> %8, i64 3 - call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle %12, i32 %9, i32 %10, i32 undef, float %13, float %14, float %15, float %16, i8 15) - call void @llvm.lifetime.end.p0i8(i64 16, i8* %4) #1 - ret void -} - -; Function Attrs: nounwind -define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #2 !types !55 { - %1 = getelementptr inbounds %struct.BuiltInTriangleIntersectionAttributes, %struct.BuiltInTriangleIntersectionAttributes* %attr, i32 0, i32 0 - %2 = load <2 x float>, <2 x float>* %1, align 4 - %3 = extractelement <2 x float> %2, i32 0 - %4 = fsub fast float 1.000000e+00, %3 - %5 = extractelement <2 x float> %2, i32 1 - %6 = fsub fast float %4, %5 - %7 = insertelement <4 x float> undef, float %6, i64 0 - %8 = insertelement <4 x float> %7, float %3, i64 1 - %9 = insertelement <4 x float> %8, float %5, i64 2 - %10 = insertelement <4 x float> %9, float 1.000000e+00, i64 3 - %11 = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %payload, i32 0, i32 0 - store <4 x float> %10, <4 x float>* %11, align 4 - ret void -} - -; Function Attrs: nounwind -define void @MyAnyHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readnone %attr) #2 !types !55 { - %1 = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %payload, i32 0, i32 0 - %2 = load <4 x float>, <4 x float>* %1, align 4 - %3 = call float @dx.op.objectRayOrigin.f32(i32 149, i8 0) - %4 = call float @dx.op.objectRayDirection.f32(i32 150, i8 0) - %5 = call float @dx.op.rayTCurrent.f32(i32 154) - %6 = fmul fast float %5, %4 - %7 = fadd fast float %6, %3 - %8 = fcmp fast ogt float %7, 0.000000e+00 - %9 = fcmp fast ogt float %7, 1.000000e+00 - %10 = fcmp fast ogt float %7, -1.000000e+00 - br i1 %8, label %11, label %14 - -11: ; preds = %0 -; acceptHitAndEndSearch - store <4 x float> %2, <4 x float>* %1, align 4 - br i1 %9, label %12, label %13 - -12: ; preds = %11 -; acceptHitAndEndSearch with unreachable - call void @dx.op.acceptHitAndEndSearch(i32 156) - unreachable - -13: ; preds = %11 -; acceptHitAndEndSearch with ret void - call void @dx.op.acceptHitAndEndSearch(i32 156) - ret void - -14: ; preds = %0 -; IgnoreHit or AcceptHit - br i1 %10, label %15, label %18 - -15: ; preds = %14 -; IgnoreHit - br i1 %9, label %16, label %17 - -16: ; preds = %15 -; IgnoreHit with unreachable - call void @dx.op.ignoreHit(i32 155) - unreachable - -17: ; preds = %15 -; IgnoreHit with ret void (as emitted by debug mode dxc) - call void @dx.op.ignoreHit(i32 155) - ret void - -18: ; preds = %14 -; AcceptHit - store <4 x float> %2, <4 x float>* %1, align 4 - ret void -} - -; Function Attrs: nounwind -define void @MyIntersectionShader() #2 { - %1 = alloca %struct.BuiltInTriangleIntersectionAttributes, align 4 - %2 = call float @dx.op.rayTCurrent.f32(i32 154) - %3 = bitcast %struct.BuiltInTriangleIntersectionAttributes* %1 to i8* - call void @llvm.lifetime.start.p0i8(i64 8, i8* %3) #1 - %4 = call i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32 158, float %2, i32 0, %struct.BuiltInTriangleIntersectionAttributes* nonnull %1) - call void @llvm.lifetime.end.p0i8(i64 8, i8* %3) #1 - ret void -} - -; Function Attrs: nounwind -define void @MyIntersectionShader2() #2 { - %1 = alloca %struct.BuiltInTriangleIntersectionAttributes2, align 4 - %2 = call float @dx.op.rayTCurrent.f32(i32 154) - %3 = bitcast %struct.BuiltInTriangleIntersectionAttributes2* %1 to i8* - call void @llvm.lifetime.start.p0i8(i64 8, i8* %3) #1 - %4 = call i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes2(i32 158, float %2, i32 0, %struct.BuiltInTriangleIntersectionAttributes2* nonnull %1) - call void @llvm.lifetime.end.p0i8(i64 8, i8* %3) #1 - ret void -} - -; Function Attrs: nounwind -define void @MyMissShader(%struct.RayPayload* noalias nocapture %payload) #2 !types !58 { - %1 = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %payload, i32 0, i32 0 - store <4 x float> , <4 x float>* %1, align 4 - ret void -} - -; Function Attrs: nounwind -declare !types !59 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #1 - -; Function Attrs: nounwind -declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #1 - -; Function Attrs: nounwind memory(none) -declare i32 @dx.op.dispatchRaysIndex.i32(i32, i8) #3 - -; Function Attrs: nounwind memory(none) -declare float @dx.op.objectRayDirection.f32(i32, i8) #3 - -; Function Attrs: nounwind memory(none) -declare float @dx.op.objectRayOrigin.f32(i32, i8) #3 - -; Function Attrs: nounwind memory(read) -declare float @dx.op.rayTCurrent.f32(i32) #4 - -declare void @dx.op.acceptHitAndEndSearch(i32) #0 - -declare void @dx.op.ignoreHit(i32) #0 - -; Function Attrs: nounwind -declare !types !60 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #1 - -; Function Attrs: nounwind -declare !types !61 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes2(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes2*) #1 - -; Function Attrs: nounwind memory(none) -declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #3 - -; Function Attrs: nounwind memory(read) -declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #4 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !63 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !63 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 - -attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind } -attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { nounwind memory(none) } -attributes #4 = { nounwind memory(read) } -attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } - -!llvm.ident = !{!0} -!dx.version = !{!1} -!dx.valver = !{!1} -!dx.shaderModel = !{!2} -!dx.resources = !{!3} -!dx.typeAnnotations = !{!10} -!dx.entryPoints = !{!18, !20, !23, !25, !27, !29, !31} -!lgc.cps.module = !{} - -!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"} -!1 = !{i32 1, i32 6} -!2 = !{!"lib", i32 6, i32 6} -!3 = !{!4, !7, null, null} -!4 = !{!5} -!5 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?Scene@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"Scene", i32 0, i32 0, i32 1, i32 16, i32 0, !6} -!6 = !{i32 0, i32 4} -!7 = !{!8} -!8 = !{i32 0, %"class.RWTexture2D >"* bitcast (%dx.types.Handle* @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" to %"class.RWTexture2D >"*), !"RenderTarget", i32 0, i32 0, i32 1, i32 2, i1 false, i1 false, i1 false, !9} -!9 = !{i32 0, i32 9} -!10 = !{i32 1, void ()* @MyRayGen, !11, void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @MyClosestHitShader, !14, void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @MyAnyHitShader, !14, void ()* @MyIntersectionShader, !11, void ()* @MyIntersectionShader2, !11, void (%struct.RayPayload*)* @MyMissShader, !17} -!11 = !{!12} -!12 = !{i32 1, !13, !13} -!13 = !{} -!14 = !{!12, !15, !16} -!15 = !{i32 2, !13, !13} -!16 = !{i32 0, !13, !13} -!17 = !{!12, !15} -!18 = !{null, !"", null, !3, !19} -!19 = !{i32 0, i64 65536} -!20 = !{void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @MyAnyHitShader, !"MyAnyHitShader", null, null, !21} -!21 = !{i32 8, i32 9, i32 6, i32 16, i32 7, i32 8, i32 5, !22} -!22 = !{i32 0} -!23 = !{void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @MyClosestHitShader, !"MyClosestHitShader", null, null, !24} -!24 = !{i32 8, i32 10, i32 6, i32 16, i32 7, i32 8, i32 5, !22} -!25 = !{void ()* @MyIntersectionShader, !"MyIntersectionShader", null, null, !26} -!26 = !{i32 8, i32 8, i32 5, !22} -!27 = !{void (%struct.RayPayload*)* @MyMissShader, !"MyMissShader", null, null, !28} -!28 = !{i32 8, i32 11, i32 6, i32 16, i32 5, !22} -!29 = !{void ()* @MyRayGen, !"MyRayGen", null, null, !30} -!30 = !{i32 8, i32 7, i32 5, !22} -!31 = !{void ()* @MyIntersectionShader2, !"MyIntersectionShader2", null, null, !26} -!32 = !{!"function", %struct.HitData poison, !33} -!33 = !{i32 0, %struct.AnyHitTraversalData poison} -!34 = !{!"function", %struct.HitData poison, !35} -!35 = !{i32 0, %struct.SystemData poison} -!36 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !35} -!37 = !{!"function", !"void", !35, %struct.BuiltInTriangleIntersectionAttributes poison} -!38 = !{!"function", i32 poison, !39} -!39 = !{i32 0, %struct.DispatchSystemData poison} -!40 = !{!"function", i1 poison, !41} -!41 = !{i32 0, %struct.TraversalData poison} -!42 = !{!"function", i32 poison, !35} -!43 = !{!"function", !"void", !39} -!44 = !{!"function", !"void", !33} -!45 = !{!"function", !"void", !39, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!46 = !{!"function", !"void", !39, i32 poison} -!47 = !{!"function", i1 poison, !33, float poison, i32 poison} -!48 = !{!"function", <3 x i32> poison, !39} -!49 = !{!"function", <3 x float> poison, !39, !50} -!50 = !{i32 0, %struct.HitData poison} -!51 = !{!"function", float poison, !39, !50} -!52 = !{!53, !53, i64 0} -!53 = !{!"omnipotent char", !54, i64 0} -!54 = !{!"Simple C/C++ TBAA"} -!55 = !{!"function", !"void", !56, !57} -!56 = !{i32 0, %struct.RayPayload poison} -!57 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} -!58 = !{!"function", !"void", !56} -!59 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !56} -!60 = !{!"function", i1 poison, i32 poison, float poison, i32 poison, !57} -!61 = !{!"function", i1 poison, i32 poison, float poison, i32 poison, !62} -!62 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes2 poison} -!63 = !{!"function", !"void", i64 poison, !64} -!64 = !{i32 0, i8 poison} -; LOWERRAYTRACINGPIPELINE-LABEL: define <3 x i32> @_cont_DispatchRaysIndex3( -; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) { -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_1:%.*]] = load i32, ptr [[RESPTR_1]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_2:%.*]] = load i32, ptr [[RESPTR_2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_3:%.*]] = load i32, ptr [[RESPTR_3]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_0:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_1:%.*]] = insertelement <3 x i32> [[VAL_0]], i32 [[RES_2]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_2:%.*]] = insertelement <3 x i32> [[VAL_1]], i32 [[RES_3]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: ret <3 x i32> [[VAL_2]] -; -; -; LOWERRAYTRACINGPIPELINE-LABEL: define <3 x float> @_cont_ObjectRayOrigin3( -; LOWERRAYTRACINGPIPELINE-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: ret <3 x float> [[VAL_2]] -; -; -; LOWERRAYTRACINGPIPELINE-LABEL: define <3 x float> @_cont_ObjectRayDirection3( -; LOWERRAYTRACINGPIPELINE-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 1, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: ret <3 x float> [[VAL_2]] -; -; -; LOWERRAYTRACINGPIPELINE-LABEL: define float @_cont_RayTCurrent( -; LOWERRAYTRACINGPIPELINE-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES:%.*]] = load float, ptr [[RESPTR]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret float [[RES]] -; -; -; LOWERRAYTRACINGPIPELINE-LABEL: define void @MyRayGen( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.entry [[META14:![0-9]+]] !continuation.registercount [[META23]] !continuation [[META35:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP4]] to ptr -; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP5]]) #[[ATTR1:[0-9]+]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA36:![0-9]+]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR3:[0-9]+]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP10]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP14]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP14]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META39:![0-9]+]], !continuation.returnedRegistercount !39 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP21]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP23]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP23]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP27]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP27]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[TMP27]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP22]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] -; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA36]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP35]], i8 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP36]], i8 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP37]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = extractelement <4 x float> [[TMP34]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = extractelement <4 x float> [[TMP34]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = extractelement <4 x float> [[TMP34]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = extractelement <4 x float> [[TMP34]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP38]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP39]], float [[TMP40]], float [[TMP41]], float [[TMP42]], i8 15) -; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP5]]) #[[ATTR1]] -; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META33:![0-9]+]] -; -; -; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @MyClosestHitShader( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META39]] !continuation [[META41:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[TMP4]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP6]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP10]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP10]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP17]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[TMP5]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[HITATTRS]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load <2 x float>, ptr [[TMP24]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[TMP25]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = fsub fast float 1.000000e+00, [[TMP26]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[TMP25]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = fsub fast float [[TMP27]], [[TMP28]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = insertelement <4 x float> undef, float [[TMP29]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = insertelement <4 x float> [[TMP30]], float [[TMP26]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = insertelement <4 x float> [[TMP31]], float [[TMP28]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float 1.000000e+00, i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP33]], ptr [[TMP34]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[TMP35]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr i32, ptr [[TMP36]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[TMP35]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[TMP39]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[TMP39]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr i32, ptr [[TMP39]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP45]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP46]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP47]], !continuation.registercount [[META39]] -; -; -; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @MyAnyHitShader( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META42:![0-9]+]] !continuation.registercount [[META39]] !continuation [[META43:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_HITDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[TMP13]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP15]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP19]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP19]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[TMP26]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP27]], ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP30]], ptr [[TMP28]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[TMP31]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[TMP14]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load <4 x float>, ptr [[TMP34]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCandidateState(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP37]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_1_I1:%.*]] = load float, ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_2_I2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP4]], i32 0, i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_2_I3:%.*]] = load float, ptr [[RESPTR_2_I2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_3_I4:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP4]], i32 0, i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_3_I5:%.*]] = load float, ptr [[RESPTR_3_I4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_0_I6:%.*]] = insertelement <3 x float> undef, float [[RES_1_I1]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_1_I7:%.*]] = insertelement <3 x float> [[VAL_0_I6]], float [[RES_2_I3]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_2_I8:%.*]] = insertelement <3 x float> [[VAL_1_I7]], float [[RES_3_I5]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[VAL_2_I8]], i8 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCandidateState(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP39]], ptr [[TMP5]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_1_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_1_I:%.*]] = load float, ptr [[RESPTR_1_I]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_2_I:%.*]] = load float, ptr [[RESPTR_2_I]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_3_I:%.*]] = load float, ptr [[RESPTR_3_I]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x float> undef, float [[RES_1_I]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x float> [[VAL_0_I]], float [[RES_2_I]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x float> [[VAL_1_I]], float [[RES_3_I]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[VAL_2_I]], i8 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCandidateState(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP41]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I:%.*]] = load float, ptr [[RESPTR_I]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = fmul fast float [[RES_I]], [[EXTRACT]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = fadd fast float [[TMP42]], [[EXTRACT1]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP43]], 0.000000e+00 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = fcmp fast ogt float [[TMP43]], 1.000000e+00 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = fcmp fast ogt float [[TMP43]], -1.000000e+00 -; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP44]], label [[TMP47:%.*]], label [[TMP92:%.*]] -; LOWERRAYTRACINGPIPELINE: 47: -; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP35]], ptr [[TMP34]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP45]], label [[TMP48:%.*]], label [[TMP70:%.*]] -; LOWERRAYTRACINGPIPELINE: 48: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP49]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = getelementptr i32, ptr [[TMP50]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = getelementptr i32, ptr [[TMP51]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP52]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP53]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP50]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = getelementptr i32, ptr [[TMP54]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP56]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP54]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP58]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = getelementptr i32, ptr [[TMP54]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP60]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = load i32, ptr [[TMP61]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP63]], ptr [[TMP62]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP64]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP66]], ptr [[TMP65]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP68]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP67]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP69]], !continuation.registercount [[META39]] -; LOWERRAYTRACINGPIPELINE: 70: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP71]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = getelementptr i32, ptr [[TMP72]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = getelementptr i32, ptr [[TMP73]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP74]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP75]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = getelementptr i32, ptr [[TMP72]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = getelementptr i32, ptr [[TMP76]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP77]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP78]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = getelementptr i32, ptr [[TMP76]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP80]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = getelementptr i32, ptr [[TMP76]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP81]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP82]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP83:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP84:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP83]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP85]], ptr [[TMP84]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP86:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP87:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP86]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP88]], ptr [[TMP87]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP89:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP90]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP89]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP91:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP91]], !continuation.registercount [[META39]] -; LOWERRAYTRACINGPIPELINE: 92: -; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP46]], label [[TMP93:%.*]], label [[TMP138:%.*]] -; LOWERRAYTRACINGPIPELINE: 93: -; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP45]], label [[TMP94:%.*]], label [[TMP116:%.*]] -; LOWERRAYTRACINGPIPELINE: 94: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_IgnoreHit(ptr [[TMP95]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP97:%.*]] = getelementptr i32, ptr [[TMP96]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP98:%.*]] = getelementptr i32, ptr [[TMP97]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP99:%.*]] = load i32, ptr [[TMP98]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP99]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP100:%.*]] = getelementptr i32, ptr [[TMP96]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP101:%.*]] = getelementptr i32, ptr [[TMP100]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP102:%.*]] = load i32, ptr [[TMP101]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP102]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP103:%.*]] = getelementptr i32, ptr [[TMP100]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP104:%.*]] = load i32, ptr [[TMP103]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP104]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP105:%.*]] = getelementptr i32, ptr [[TMP100]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP106:%.*]] = load i32, ptr [[TMP105]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP106]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP107:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP108:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP109:%.*]] = load i32, ptr [[TMP107]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP109]], ptr [[TMP108]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP110:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP111:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP112:%.*]] = load i32, ptr [[TMP110]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP112]], ptr [[TMP111]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP113:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP114:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP114]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP113]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP115:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP115]], !continuation.registercount [[META39]] -; LOWERRAYTRACINGPIPELINE: 116: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_IgnoreHit(ptr [[TMP117]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP119:%.*]] = getelementptr i32, ptr [[TMP118]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP120:%.*]] = getelementptr i32, ptr [[TMP119]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP121:%.*]] = load i32, ptr [[TMP120]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP121]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP122:%.*]] = getelementptr i32, ptr [[TMP118]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP123:%.*]] = getelementptr i32, ptr [[TMP122]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP124:%.*]] = load i32, ptr [[TMP123]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP124]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP125:%.*]] = getelementptr i32, ptr [[TMP122]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP126:%.*]] = load i32, ptr [[TMP125]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP126]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP127:%.*]] = getelementptr i32, ptr [[TMP122]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP128:%.*]] = load i32, ptr [[TMP127]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP128]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP129:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP130:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP131:%.*]] = load i32, ptr [[TMP129]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP131]], ptr [[TMP130]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP132:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP133:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP134:%.*]] = load i32, ptr [[TMP132]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP134]], ptr [[TMP133]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP135:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP136]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP135]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP137:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP137]], !continuation.registercount [[META39]] -; LOWERRAYTRACINGPIPELINE: 138: -; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP35]], ptr [[TMP34]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP140:%.*]] = getelementptr i32, ptr [[TMP139]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP141:%.*]] = getelementptr i32, ptr [[TMP140]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP142:%.*]] = load i32, ptr [[TMP141]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP142]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP143:%.*]] = getelementptr i32, ptr [[TMP139]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP144:%.*]] = getelementptr i32, ptr [[TMP143]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP145:%.*]] = load i32, ptr [[TMP144]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP145]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP146:%.*]] = getelementptr i32, ptr [[TMP143]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP147:%.*]] = load i32, ptr [[TMP146]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP147]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP148:%.*]] = getelementptr i32, ptr [[TMP143]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP149:%.*]] = load i32, ptr [[TMP148]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP149]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP150:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP151:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP152:%.*]] = load i32, ptr [[TMP150]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP152]], ptr [[TMP151]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP153:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP154:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP155:%.*]] = load i32, ptr [[TMP153]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP155]], ptr [[TMP154]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP156:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP157]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP156]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP158:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP158]], !continuation.registercount [[META39]] -; -; -; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @MyIntersectionShader( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META44:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META45:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[TMP5]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[TMP6]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCandidateState(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP8]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I:%.*]] = load float, ptr [[RESPTR_I]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = bitcast ptr [[TMP4]] to ptr -; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP9]]) #[[ATTR1]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[ORIGTPTR_I:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[ORIGT_I:%.*]] = load float, ptr [[ORIGTPTR_I]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I]], [[ORIGT_I]] -; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] -; LOWERRAYTRACINGPIPELINE: callAHit.i: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP10]]), !continuation.registercount [[META33]], !continuation.returnedRegistercount !33 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] @await.struct.AnyHitTraversalData(ptr [[TMP11]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP12]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[LOCAL_ROOT_INDEX:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[TMP13]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[LOCAL_ROOT_INDEX]]) -; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] -; LOWERRAYTRACINGPIPELINE: isEnd.i: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP21]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP20]]) -; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT]] -; LOWERRAYTRACINGPIPELINE: _cont_ReportHit.exit: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = call i1 @_cont_IsEndSearch(ptr [[TMP22]]) -; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP23]], label [[TMP24:%.*]], label [[TMP26:%.*]] -; LOWERRAYTRACINGPIPELINE: 24: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP25]], !continuation.registercount [[META33]] -; LOWERRAYTRACINGPIPELINE: 26: -; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP9]]) #[[ATTR1]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP27]], !continuation.registercount [[META33]] -; -; -; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @MyIntersectionShader2( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META44]] !continuation.registercount [[META33]] !continuation [[META46:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[TMP5]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[TMP6]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCandidateState(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP8]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I:%.*]] = load float, ptr [[RESPTR_I]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = bitcast ptr [[TMP4]] to ptr -; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP9]]) #[[ATTR1]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[ORIGTPTR_I:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[ORIGT_I:%.*]] = load float, ptr [[ORIGTPTR_I]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I]], [[ORIGT_I]] -; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] -; LOWERRAYTRACINGPIPELINE: callAHit.i: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[TMP10]]), !continuation.registercount [[META33]], !continuation.returnedRegistercount !33 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] @await.struct.AnyHitTraversalData(ptr [[TMP11]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP12]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[LOCAL_ROOT_INDEX:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[TMP13]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[LOCAL_ROOT_INDEX]]) -; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] -; LOWERRAYTRACINGPIPELINE: isEnd.i: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP21]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP20]]) -; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT]] -; LOWERRAYTRACINGPIPELINE: _cont_ReportHit.exit: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = call i1 @_cont_IsEndSearch(ptr [[TMP22]]) -; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP23]], label [[TMP24:%.*]], label [[TMP26:%.*]] -; LOWERRAYTRACINGPIPELINE: 24: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP25]], !continuation.registercount [[META33]] -; LOWERRAYTRACINGPIPELINE: 26: -; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP9]]) #[[ATTR1]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP27]], !continuation.registercount [[META33]] -; -; -; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @MyMissShader( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META47:![0-9]+]] !continuation.registercount [[META39]] !continuation [[META48:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[TMP3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP5]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[TMP4]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> , ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP18]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP17]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP21]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP21]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP28]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP29]], !continuation.registercount [[META39]] -; -; -; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define <3 x i32> @_cont_DispatchRaysIndex3( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr [[DATA:%.*]]) { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_1:%.*]] = load i32, ptr [[RESPTR_1]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_2:%.*]] = load i32, ptr [[RESPTR_2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_3:%.*]] = load i32, ptr [[RESPTR_3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x i32> [[VAL_0]], i32 [[RES_2]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x i32> [[VAL_1]], i32 [[RES_3]], i32 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret <3 x i32> [[VAL_2]] -; -; -; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define <3 x float> @_cont_ObjectRayOrigin3( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret <3 x float> [[VAL_2]] -; -; -; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define <3 x float> @_cont_ObjectRayDirection3( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 1, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret <3 x float> [[VAL_2]] -; -; -; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define float @_cont_RayTCurrent( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES:%.*]] = load float, ptr [[RESPTR]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret float [[RES]] -; -; -; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyRayGen( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !lgc.cps [[META23]] !continuation [[META35:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP4]] to ptr -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP5]]) #[[ATTR1:[0-9]+]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA36:![0-9]+]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR_I:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR3:[0-9]+]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP13]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP10]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP16]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP14]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP18]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP14]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] (...) @lgc.cps.await.s_struct.DispatchSystemDatas(i32 4, i32 4, i32 [[TMP21]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP23]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP23]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP27]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP27]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[TMP27]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP22]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[DOTSPLIT:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: .split: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA36]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP35]], i8 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP36]], i8 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP37]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = extractelement <4 x float> [[TMP34]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP40:%.*]] = extractelement <4 x float> [[TMP34]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP41:%.*]] = extractelement <4 x float> [[TMP34]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP42:%.*]] = extractelement <4 x float> [[TMP34]], i64 3 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP38]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP39]], float [[TMP40]], float [[TMP41]], float [[TMP42]], i8 15) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP5]]) #[[ATTR1]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret void -; -; -; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyClosestHitShader( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META39:![0-9]+]] !lgc.cps [[META40:![0-9]+]] !continuation [[META41:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP4]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP7]], ptr [[TMP5]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[HITATTRS]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load <2 x float>, ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP12]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = fsub fast float 1.000000e+00, [[TMP13]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[TMP12]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = fsub fast float [[TMP14]], [[TMP15]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = insertelement <4 x float> undef, float [[TMP16]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP17]], float [[TMP13]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP15]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = insertelement <4 x float> [[TMP19]], float 1.000000e+00, i64 3 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP20]], ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP23]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP25]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP22]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP26]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP28]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP26]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP30]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[TMP26]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP32]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP33]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP34]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; -; -; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyAnyHitShader( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META42:![0-9]+]] !lgc.cps [[META39]] !continuation [[META43:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_HITDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[TMP13]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP14]], ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP20]], ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = load <4 x float>, ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCandidateState(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[TMP24]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_1_I1:%.*]] = load float, ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_2_I2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP4]], i32 0, i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_2_I3:%.*]] = load float, ptr [[RESPTR_2_I2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_3_I4:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP4]], i32 0, i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_3_I5:%.*]] = load float, ptr [[RESPTR_3_I4]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_0_I6:%.*]] = insertelement <3 x float> undef, float [[RES_1_I1]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_1_I7:%.*]] = insertelement <3 x float> [[VAL_0_I6]], float [[RES_2_I3]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_2_I8:%.*]] = insertelement <3 x float> [[VAL_1_I7]], float [[RES_3_I5]], i32 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[VAL_2_I8]], i8 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCandidateState(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[TMP26]], ptr [[TMP5]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_1_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_1_I:%.*]] = load float, ptr [[RESPTR_1_I]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1, i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_2_I:%.*]] = load float, ptr [[RESPTR_2_I]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1, i32 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_3_I:%.*]] = load float, ptr [[RESPTR_3_I]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x float> undef, float [[RES_1_I]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x float> [[VAL_0_I]], float [[RES_2_I]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x float> [[VAL_1_I]], float [[RES_3_I]], i32 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[VAL_2_I]], i8 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCandidateState(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[TMP28]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I:%.*]] = load float, ptr [[RESPTR_I]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = fmul fast float [[RES_I]], [[EXTRACT]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP29]], [[EXTRACT1]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], 0.000000e+00 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], 1.000000e+00 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = fcmp fast ogt float [[TMP30]], -1.000000e+00 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP31]], label [[TMP34:%.*]], label [[TMP79:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: 34: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP22]], ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP32]], label [[TMP35:%.*]], label [[TMP57:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: 35: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP36]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr [[TMP37]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[TMP38]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP40]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP41:%.*]] = getelementptr i32, ptr [[TMP37]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[TMP41]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP43]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP44:%.*]] = getelementptr i32, ptr [[TMP41]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP45]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr [[TMP41]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP47]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP48]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP50]], ptr [[TMP49]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP51]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP53]], ptr [[TMP52]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP54:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP55]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP54]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP56:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP56]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE-CPS: 57: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP58]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP59]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP61:%.*]] = getelementptr i32, ptr [[TMP60]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP62:%.*]] = load i32, ptr [[TMP61]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP62]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP63:%.*]] = getelementptr i32, ptr [[TMP59]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[TMP63]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP64]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP65]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP66:%.*]] = getelementptr i32, ptr [[TMP63]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP67:%.*]] = load i32, ptr [[TMP66]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP67]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP68:%.*]] = getelementptr i32, ptr [[TMP63]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP69:%.*]] = load i32, ptr [[TMP68]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP69]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP70]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP72]], ptr [[TMP71]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP73]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP75]], ptr [[TMP74]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP76:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP77]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP76]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP78:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP78]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE-CPS: 79: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP33]], label [[TMP80:%.*]], label [[TMP125:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: 80: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP32]], label [[TMP81:%.*]], label [[TMP103:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: 81: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP82]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP84:%.*]] = getelementptr i32, ptr [[TMP83]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP85:%.*]] = getelementptr i32, ptr [[TMP84]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP86]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP87:%.*]] = getelementptr i32, ptr [[TMP83]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP88:%.*]] = getelementptr i32, ptr [[TMP87]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP88]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP89]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP90:%.*]] = getelementptr i32, ptr [[TMP87]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP90]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP91]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP92:%.*]] = getelementptr i32, ptr [[TMP87]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP93:%.*]] = load i32, ptr [[TMP92]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP93]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP94:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP95:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP94]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP96]], ptr [[TMP95]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP97:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP98:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP99:%.*]] = load i32, ptr [[TMP97]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP99]], ptr [[TMP98]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP100:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP101]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP100]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP102:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP102]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE-CPS: 103: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP104]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP106:%.*]] = getelementptr i32, ptr [[TMP105]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP107:%.*]] = getelementptr i32, ptr [[TMP106]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP108:%.*]] = load i32, ptr [[TMP107]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP108]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP109:%.*]] = getelementptr i32, ptr [[TMP105]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP110:%.*]] = getelementptr i32, ptr [[TMP109]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP111:%.*]] = load i32, ptr [[TMP110]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP111]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP112:%.*]] = getelementptr i32, ptr [[TMP109]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP113:%.*]] = load i32, ptr [[TMP112]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP113]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP114:%.*]] = getelementptr i32, ptr [[TMP109]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP115:%.*]] = load i32, ptr [[TMP114]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP115]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP116:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP117:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP118:%.*]] = load i32, ptr [[TMP116]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP118]], ptr [[TMP117]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP119:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP120:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP121:%.*]] = load i32, ptr [[TMP119]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP121]], ptr [[TMP120]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP122:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP123]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP122]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP124:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP124]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE-CPS: 125: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP22]], ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP127:%.*]] = getelementptr i32, ptr [[TMP126]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP128:%.*]] = getelementptr i32, ptr [[TMP127]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP129:%.*]] = load i32, ptr [[TMP128]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP129]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP130:%.*]] = getelementptr i32, ptr [[TMP126]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP131:%.*]] = getelementptr i32, ptr [[TMP130]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP132:%.*]] = load i32, ptr [[TMP131]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP132]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP133:%.*]] = getelementptr i32, ptr [[TMP130]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP134:%.*]] = load i32, ptr [[TMP133]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP134]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP135:%.*]] = getelementptr i32, ptr [[TMP130]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP136:%.*]] = load i32, ptr [[TMP135]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP136]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP137:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP138:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP139:%.*]] = load i32, ptr [[TMP137]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP139]], ptr [[TMP138]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP140:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP141:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP142:%.*]] = load i32, ptr [[TMP140]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP142]], ptr [[TMP141]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP143:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP144:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP144]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP143]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP145:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP145]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; -; -; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyIntersectionShader( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40]] !lgc.cps [[META44:![0-9]+]] !continuation [[META45:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCandidateState(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[TMP6]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I:%.*]] = load float, ptr [[RESPTR_I]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = bitcast ptr [[TMP4]] to ptr -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGTPTR_I:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGT_I:%.*]] = load float, ptr [[ORIGTPTR_I]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I]], [[ORIGT_I]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: callAHit.i: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[TMP8]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] (...) @lgc.cps.await.s_struct.AnyHitTraversalDatas(i32 3, i32 8, i32 [[TMP9]], float [[RES_I]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP10]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP11]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[LOCAL_ROOT_INDEX:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[TMP12]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[LOCAL_ROOT_INDEX]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: isEnd.i: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP20]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP19]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT]] -; LOWERRAYTRACINGPIPELINE-CPS: _cont_ReportHit.exit: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = call i1 @_cont_IsEndSearch(ptr [[TMP21]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP25:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: 23: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP24]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE-CPS: 25: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP26]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; -; -; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyIntersectionShader2( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40]] !lgc.cps [[META44]] !continuation [[META46:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCandidateState(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[TMP6]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I:%.*]] = load float, ptr [[RESPTR_I]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = bitcast ptr [[TMP4]] to ptr -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGTPTR_I:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGT_I:%.*]] = load float, ptr [[ORIGTPTR_I]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I]], [[ORIGT_I]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: callAHit.i: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[TMP8]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] (...) @lgc.cps.await.s_struct.AnyHitTraversalDatas(i32 3, i32 8, i32 [[TMP9]], float [[RES_I]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[TMP10]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP11]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[LOCAL_ROOT_INDEX:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[TMP12]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[LOCAL_ROOT_INDEX]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: isEnd.i: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP20]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP19]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT]] -; LOWERRAYTRACINGPIPELINE-CPS: _cont_ReportHit.exit: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = call i1 @_cont_IsEndSearch(ptr [[TMP21]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP25:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: 23: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP24]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE-CPS: 25: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP26]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; -; -; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyMissShader( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META44]] !lgc.cps [[META40]] !continuation [[META47:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> , ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP7]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP10]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP12]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP14]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP16]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; diff --git a/shared/continuations/test/dx/remat-intrinsic.ll b/shared/continuations/test/dx/remat-intrinsic.ll deleted file mode 100644 index 973927526b..0000000000 --- a/shared/continuations/test/dx/remat-intrinsic.ll +++ /dev/null @@ -1,202 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,save-continuation-state,lint,dxil-cont-post-process,lint,remove-types-metadata' \ -; RUN: -S %s 2> %t.stderr | FileCheck -check-prefix=SAVESTATE %s -; RUN: count 0 < %t.stderr - -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" - -%dx.types.Handle = type { i8* } -%struct.DispatchSystemData = type { i32 } -%struct.TraversalData = type { %struct.SystemData } -%struct.SystemData = type { %struct.DispatchSystemData } -%struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } -%struct.MyParams = type { i32 } -%dx.types.fouri32 = type { i32, i32, i32, i32 } -%dx.types.ResourceProperties = type { i32, i32 } -%"class.RWTexture2D >" = type { <4 x float> } - -@"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 - -declare i32 @_cont_GetContinuationStackAddr() - -declare %struct.DispatchSystemData @_cont_SetupRayGen() - -declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) - -declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) - -declare !types !14 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) - -; Function Attrs: nounwind memory(none) -declare !types !16 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone) #0 - -; Function Attrs: nounwind memory(none) -declare !types !18 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #0 - -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !types !19 { - ret i32 5 -} - -define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) !types !20 { - %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 - %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) - store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 - call void @_AmdRestoreSystemData(%struct.DispatchSystemData* %data) - ret void -} - -define void @called(%struct.MyParams* %params) !types !21 { - %i = call i32 @dx.op.dispatchRaysIndex.i32(i32 145, i8 0) - %unpacked = call %dx.types.fouri32 @dx.op.unpack4x8.i32(i32 219, i8 1, i32 %i) - %params_i = getelementptr %struct.MyParams, %struct.MyParams* %params, i32 0, i32 0 - %handle0 = load %dx.types.Handle, %dx.types.Handle* @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 - %handle1 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %handle0) - %handle2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %handle1, %dx.types.ResourceProperties { i32 16, i32 0 }) - call void @dx.op.callShader.struct.MyParams(i32 159, i32 2, %struct.MyParams* nonnull %params) - %a = extractvalue %dx.types.fouri32 %unpacked, 0 - %b = extractvalue %dx.types.fouri32 %unpacked, 1 - %c = extractvalue %dx.types.fouri32 %unpacked, 2 - %d = extractvalue %dx.types.fouri32 %unpacked, 3 - %packed = call i32 @dx.op.pack4x8.i32(i32 220, i8 0, i32 %a, i32 %b, i32 %c, i32 %d) - call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle %handle2, i32 0, i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, i8 15) - store i32 %packed, i32* %params_i, align 4 - ret void -} - -; Function Attrs: nounwind -declare !types !23 void @dx.op.callShader.struct.MyParams(i32, i32, %struct.MyParams*) #1 - -; Function Attrs: nounwind memory(none) -declare i32 @dx.op.dispatchRaysIndex.i32(i32, i8) #0 - -; Function Attrs: nounwind memory(none) -declare %dx.types.fouri32 @dx.op.unpack4x8.i32(i32, i8, i32) #0 - -; Function Attrs: nounwind memory(none) -declare i32 @dx.op.pack4x8.i32(i32, i8, i32, i32, i32, i32) #0 - -; Function Attrs: nounwind memory(none) -declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #0 - -; Function Attrs: nounwind memory(none) -declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #0 - -; Function Attrs: nounwind -declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #1 - -attributes #0 = { nounwind memory(none) } -attributes #1 = { nounwind } - -!llvm.ident = !{!0} -!dx.version = !{!1} -!dx.valver = !{!1} -!dx.shaderModel = !{!2} -!dx.entryPoints = !{!3, !6} -!continuation.maxPayloadRegisterCount = !{!13} - -!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"} -!1 = !{i32 1, i32 6} -!2 = !{!"lib", i32 6, i32 6} -!3 = !{null, !"", null, !4, !12} -!4 = !{!5, !9, null, null} -!5 = !{!6} -!6 = !{void (%struct.MyParams*)* @called, !"called", null, null, !7} -!7 = !{i32 8, i32 12, i32 6, i32 16, i32 7, i32 8, i32 5, !8} -!8 = !{i32 0} -!9 = !{!10} -!10 = !{i32 0, %"class.RWTexture2D >"* bitcast (%dx.types.Handle* @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" to %"class.RWTexture2D >"*), !"RenderTarget", i32 0, i32 0, i32 1, i32 2, i1 false, i1 false, i1 false, !11} -!11 = !{i32 0, i32 9} -!12 = !{i32 0, i64 65536} -!13 = !{i32 30} -!14 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !15} -!15 = !{i32 0, %struct.SystemData poison} -!16 = !{!"function", <3 x i32> poison, !17} -!17 = !{i32 0, %struct.DispatchSystemData poison} -!18 = !{!"function", !"void", !17} -!19 = !{!"function", i32 poison, !17} -!20 = !{!"function", !"void", !17, i32 poison} -!21 = !{!"function", !"void", !22} -!22 = !{i32 0, %struct.MyParams poison} -!23 = !{!"function", !"void", i32 poison, i32 poison, !22} -; SAVESTATE-LABEL: define i32 @_cont_GetLocalRootIndex( -; SAVESTATE-SAME: ptr [[DATA:%.*]]) #[[ATTR1:[0-9]+]] { -; SAVESTATE-NEXT: ret i32 5 -; -; -; SAVESTATE-LABEL: define void @called( -; SAVESTATE-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !continuation [[META15:![0-9]+]] !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation.state [[META18:![0-9]+]] !continuation.stacksize [[META18]] { -; SAVESTATE-NEXT: AllocaSpillBB: -; SAVESTATE-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; SAVESTATE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; SAVESTATE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; SAVESTATE-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; SAVESTATE-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i64 0 -; SAVESTATE-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(21) [[TMP3]], i32 0, i32 0 -; SAVESTATE-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[RETURNADDR_SPILL_ADDR]], align 4 -; SAVESTATE-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; SAVESTATE-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; SAVESTATE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; SAVESTATE-NEXT: [[TMP5:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) -; SAVESTATE-NEXT: [[I:%.*]] = extractelement <3 x i32> [[TMP5]], i8 0 -; SAVESTATE-NEXT: [[UNPACKED:%.*]] = call [[DX_TYPES_FOURI32:%.*]] @dx.op.unpack4x8.i32(i32 219, i8 1, i32 [[I]]) -; SAVESTATE-NEXT: [[HANDLE0:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; SAVESTATE-NEXT: [[HANDLE1:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[HANDLE0]]) -; SAVESTATE-NEXT: [[HANDLE2:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[HANDLE1]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; SAVESTATE-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; SAVESTATE-NEXT: store i32 [[TMP4]], ptr addrspace(20) @REGISTERS, align 4 -; SAVESTATE-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 8 -; SAVESTATE-NEXT: store i32 [[TMP7]], ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP9:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @called.resume.0 to i64)) -; SAVESTATE-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP8]], i64 [[TMP9]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount !17 -; SAVESTATE-NEXT: unreachable -; -; -; SAVESTATE-LABEL: define dso_local void @called.resume.0( -; SAVESTATE-SAME: i32 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !continuation [[META15]] !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META17]] { -; SAVESTATE-NEXT: entryresume.0: -; SAVESTATE-NEXT: [[CSP:%.*]] = alloca i32, align 4 -; SAVESTATE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; SAVESTATE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; SAVESTATE-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -8 -; SAVESTATE-NEXT: store i32 [[TMP3]], ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; SAVESTATE-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; SAVESTATE-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP5]], i64 0 -; SAVESTATE-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; SAVESTATE-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 -; SAVESTATE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; SAVESTATE-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(21) [[TMP6]], i32 0, i32 0 -; SAVESTATE-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[RETURNADDR_RELOAD_ADDR]], align 4 -; SAVESTATE-NEXT: [[HANDLE011:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; SAVESTATE-NEXT: [[HANDLE110:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[HANDLE011]]) -; SAVESTATE-NEXT: [[HANDLE29:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[HANDLE110]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; SAVESTATE-NEXT: [[TMP8:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) -; SAVESTATE-NEXT: [[I8:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 -; SAVESTATE-NEXT: [[UNPACKED7:%.*]] = call [[DX_TYPES_FOURI32:%.*]] @dx.op.unpack4x8.i32(i32 219, i8 1, i32 [[I8]]) -; SAVESTATE-NEXT: [[TMP9:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) -; SAVESTATE-NEXT: [[I6:%.*]] = extractelement <3 x i32> [[TMP9]], i8 0 -; SAVESTATE-NEXT: [[UNPACKED5:%.*]] = call [[DX_TYPES_FOURI32]] @dx.op.unpack4x8.i32(i32 219, i8 1, i32 [[I6]]) -; SAVESTATE-NEXT: [[TMP10:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) -; SAVESTATE-NEXT: [[I4:%.*]] = extractelement <3 x i32> [[TMP10]], i8 0 -; SAVESTATE-NEXT: [[UNPACKED3:%.*]] = call [[DX_TYPES_FOURI32]] @dx.op.unpack4x8.i32(i32 219, i8 1, i32 [[I4]]) -; SAVESTATE-NEXT: [[TMP11:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) -; SAVESTATE-NEXT: [[I2:%.*]] = extractelement <3 x i32> [[TMP11]], i8 0 -; SAVESTATE-NEXT: [[UNPACKED1:%.*]] = call [[DX_TYPES_FOURI32]] @dx.op.unpack4x8.i32(i32 219, i8 1, i32 [[I2]]) -; SAVESTATE-NEXT: [[A:%.*]] = extractvalue [[DX_TYPES_FOURI32]] [[UNPACKED7]], 0 -; SAVESTATE-NEXT: [[B:%.*]] = extractvalue [[DX_TYPES_FOURI32]] [[UNPACKED5]], 1 -; SAVESTATE-NEXT: [[C:%.*]] = extractvalue [[DX_TYPES_FOURI32]] [[UNPACKED3]], 2 -; SAVESTATE-NEXT: [[D:%.*]] = extractvalue [[DX_TYPES_FOURI32]] [[UNPACKED1]], 3 -; SAVESTATE-NEXT: [[PACKED:%.*]] = call i32 @dx.op.pack4x8.i32(i32 220, i8 0, i32 [[A]], i32 [[B]], i32 [[C]], i32 [[D]]) -; SAVESTATE-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[HANDLE29]], i32 0, i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, i8 15) -; SAVESTATE-NEXT: store i32 [[PACKED]], ptr addrspace(20) @REGISTERS, align 4 -; SAVESTATE-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 -; SAVESTATE-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 -; SAVESTATE-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP12]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] -; SAVESTATE-NEXT: unreachable -; diff --git a/shared/continuations/unittests/CMakeLists.txt b/shared/continuations/unittests/CMakeLists.txt deleted file mode 100644 index 20df42a3a5..0000000000 --- a/shared/continuations/unittests/CMakeLists.txt +++ /dev/null @@ -1,42 +0,0 @@ -# Continuations Unit tests. -# To execute all unit tests, run: -# cmake --build . --target check-continuations-units - -add_custom_target(ContinuationsUnitTests) -set_target_properties(ContinuationsUnitTests PROPERTIES FOLDER "Continuations Tests") - -function(add_continuations_unittest test_dirname) - add_unittest(ContinuationsUnitTests ${test_dirname} ${ARGN}) -endfunction() - -# Add a LIT target to execute all unit tests. -# Required by lit.site.cfg.py.in. -set(CONTINUATIONS_UNIT_TEST_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(CONTINUATIONS_UNIT_TEST_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) - -# Main config for unit tests. -configure_lit_site_cfg( - ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py - MAIN_CONFIG - ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py -) - -add_lit_testsuite(check-continuations-units "Running the Continuations unit tests" - ${CMAKE_CURRENT_BINARY_DIR} - ${exclude_from_check_all} - DEPENDS - ContinuationsUnitTests -) - -add_continuations_unittest(ContinuationsUnitTargetTests - RemainingArgumentDwordTests.cpp -) - -target_link_libraries(ContinuationsUnitTargetTests PRIVATE - LLVMContinuations - LLVMCore - LLVMSupport -) - -set_compiler_options(ContinuationsUnitTargetTests) diff --git a/tool/dumper/vkgcPipelineDumper.cpp b/tool/dumper/vkgcPipelineDumper.cpp index 32d7c8ea18..105be4f11f 100644 --- a/tool/dumper/vkgcPipelineDumper.cpp +++ b/tool/dumper/vkgcPipelineDumper.cpp @@ -629,30 +629,33 @@ void PipelineDumper::dumpPipelineShaderInfo(const PipelineShaderInfo *shaderInfo // Output pipeline shader options // clang-format off + char clientHashHexValue[64] = {}; + snprintf(clientHashHexValue, 64, "0x%" PRIX64 ", 0x%" PRIX64, + shaderInfo->options.clientHash.lower, shaderInfo->options.clientHash.upper); + dumpFile << "options.clientHash = " << clientHashHexValue << "\n"; dumpFile << "options.trapPresent = " << shaderInfo->options.trapPresent << "\n"; dumpFile << "options.debugMode = " << shaderInfo->options.debugMode << "\n"; dumpFile << "options.enablePerformanceData = " << shaderInfo->options.enablePerformanceData << "\n"; dumpFile << "options.allowReZ = " << shaderInfo->options.allowReZ << "\n"; - dumpFile << "options.forceLateZ = " << shaderInfo->options.forceLateZ << "\n"; dumpFile << "options.vgprLimit = " << shaderInfo->options.vgprLimit << "\n"; dumpFile << "options.sgprLimit = " << shaderInfo->options.sgprLimit << "\n"; dumpFile << "options.maxThreadGroupsPerComputeUnit = " << shaderInfo->options.maxThreadGroupsPerComputeUnit << "\n"; - dumpFile << "options.waveSize = " << shaderInfo->options.waveSize << "\n"; dumpFile << "options.subgroupSize = " << shaderInfo->options.subgroupSize << "\n"; + dumpFile << "options.waveSize = " << shaderInfo->options.waveSize << "\n"; dumpFile << "options.wgpMode = " << shaderInfo->options.wgpMode << "\n"; dumpFile << "options.waveBreakSize = " << shaderInfo->options.waveBreakSize << "\n"; dumpFile << "options.forceLoopUnrollCount = " << shaderInfo->options.forceLoopUnrollCount << "\n"; + dumpFile << "options.enableLoadScalarizer = " << shaderInfo->options.enableLoadScalarizer << "\n"; + dumpFile << "options.allowVaryWaveSize = " << shaderInfo->options.allowVaryWaveSize << "\n"; dumpFile << "options.useSiScheduler = " << shaderInfo->options.useSiScheduler << "\n"; dumpFile << "options.disableCodeSinking = " << shaderInfo->options.disableCodeSinking << "\n"; dumpFile << "options.favorLatencyHiding = " << shaderInfo->options.favorLatencyHiding << "\n"; - dumpFile << "options.allowVaryWaveSize = " << shaderInfo->options.allowVaryWaveSize << "\n"; - dumpFile << "options.enableLoadScalarizer = " << shaderInfo->options.enableLoadScalarizer << "\n"; dumpFile << "options.disableLicm = " << shaderInfo->options.disableLicm << "\n"; dumpFile << "options.unrollThreshold = " << shaderInfo->options.unrollThreshold << "\n"; dumpFile << "options.scalarThreshold = " << shaderInfo->options.scalarThreshold << "\n"; dumpFile << "options.disableLoopUnroll = " << shaderInfo->options.disableLoopUnroll << "\n"; - dumpFile << "options.fp32DenormalMode = " << shaderInfo->options.fp32DenormalMode << "\n"; dumpFile << "options.adjustDepthImportVrs = " << shaderInfo->options.adjustDepthImportVrs << "\n"; + dumpFile << "options.fp32DenormalMode = " << shaderInfo->options.fp32DenormalMode << "\n"; dumpFile << "options.disableLicmThreshold = " << shaderInfo->options.disableLicmThreshold << "\n"; dumpFile << "options.unrollHintThreshold = " << shaderInfo->options.unrollHintThreshold << "\n"; dumpFile << "options.dontUnrollHintThreshold = " << shaderInfo->options.dontUnrollHintThreshold << "\n"; @@ -661,17 +664,20 @@ void PipelineDumper::dumpPipelineShaderInfo(const PipelineShaderInfo *shaderInfo dumpFile << "options.disableFastMathFlags = " << shaderInfo->options.disableFastMathFlags << "\n"; dumpFile << "options.ldsSpillLimitDwords = " << shaderInfo->options.ldsSpillLimitDwords << "\n"; dumpFile << "options.scalarizeWaterfallLoads = " << shaderInfo->options.scalarizeWaterfallLoads << "\n"; + dumpFile << "options.overrideForceThreadIdSwizzling = " << shaderInfo->options.overrideForceThreadIdSwizzling << "\n"; dumpFile << "options.overrideShaderThreadGroupSizeX = " << shaderInfo->options.overrideShaderThreadGroupSizeX << "\n"; dumpFile << "options.overrideShaderThreadGroupSizeY = " << shaderInfo->options.overrideShaderThreadGroupSizeY << "\n"; dumpFile << "options.overrideShaderThreadGroupSizeZ = " << shaderInfo->options.overrideShaderThreadGroupSizeZ << "\n"; + dumpFile << "options.forceLateZ = " << shaderInfo->options.forceLateZ << "\n"; dumpFile << "options.nsaThreshold = " << shaderInfo->options.nsaThreshold << "\n"; dumpFile << "options.aggressiveInvariantLoads = " << shaderInfo->options.aggressiveInvariantLoads << "\n"; dumpFile << "options.workaroundStorageImageFormats = " << shaderInfo->options.workaroundStorageImageFormats << "\n"; dumpFile << "options.workaroundInitializeOutputsToZero = " << shaderInfo->options.workaroundInitializeOutputsToZero << "\n"; dumpFile << "options.disableFMA = " << shaderInfo->options.disableFMA << "\n"; - dumpFile << "options.constantBufferBindingOffset = " << shaderInfo->options.constantBufferBindingOffset << "\n"; + dumpFile << "options.disableReadFirstLaneWorkaround = " << shaderInfo->options.disableReadFirstLaneWorkaround << "\n"; dumpFile << "options.backwardPropagateNoContract = " << shaderInfo->options.backwardPropagateNoContract << "\n"; dumpFile << "options.forwardPropagateNoContract = " << shaderInfo->options.forwardPropagateNoContract << "\n"; + dumpFile << "options.constantBufferBindingOffset = " << shaderInfo->options.constantBufferBindingOffset << "\n"; dumpFile << "\n"; // clang-format on } @@ -827,7 +833,6 @@ void PipelineDumper::dumpComputeStateInfo(const ComputePipelineBuildInfo *pipeli dumpRayTracingRtState(&pipelineInfo->rtState, dumpDir, dumpFile); if (pipelineInfo->pUniformMap) { - dumpFile << "\n[UniformConstant]\n"; dumpFile << "uniformConstantMaps[0].visibility = " << pipelineInfo->pUniformMap->visibility << "\n"; UniformConstantMapEntry *locationOffsetMap = pipelineInfo->pUniformMap->pUniforms; for (unsigned i = 0; i < pipelineInfo->pUniformMap->numUniformConstants; i++) { @@ -847,14 +852,14 @@ void PipelineDumper::dumpComputeStateInfo(const ComputePipelineBuildInfo *pipeli void PipelineDumper::dumpPipelineOptions(const PipelineOptions *options, std::ostream &dumpFile) { dumpFile << "options.includeDisassembly = " << options->includeDisassembly << "\n"; dumpFile << "options.scalarBlockLayout = " << options->scalarBlockLayout << "\n"; - dumpFile << "options.resourceLayoutScheme = " << options->resourceLayoutScheme << "\n"; - dumpFile << "options.includeIr = " << options->includeIr << "\n"; - dumpFile << "options.robustBufferAccess = " << options->robustBufferAccess << "\n"; dumpFile << "options.reconfigWorkgroupLayout = " << options->reconfigWorkgroupLayout << "\n"; dumpFile << "options.forceCsThreadIdSwizzling = " << options->forceCsThreadIdSwizzling << "\n"; - dumpFile << "options.overrideThreadGroupSizeX = " << options->overrideThreadGroupSizeX << "\n"; - dumpFile << "options.overrideThreadGroupSizeY = " << options->overrideThreadGroupSizeY << "\n"; - dumpFile << "options.overrideThreadGroupSizeZ = " << options->overrideThreadGroupSizeZ << "\n"; + dumpFile << "options.includeIr = " << options->includeIr << "\n"; + dumpFile << "options.robustBufferAccess = " << options->robustBufferAccess << "\n"; + dumpFile << "options.enableRelocatableShaderElf = " << options->enableRelocatableShaderElf << "\n"; + dumpFile << "options.disableImageResourceCheck = " << options->disableImageResourceCheck << "\n"; + dumpFile << "options.enableScratchAccessBoundsChecks = " << options->enableScratchAccessBoundsChecks << "\n"; + dumpFile << "options.enableImplicitInvariantExports = " << options->enableImplicitInvariantExports << "\n"; dumpFile << "options.shadowDescriptorTableUsage = " << options->shadowDescriptorTableUsage << "\n"; dumpFile << "options.shadowDescriptorTablePtrHigh = " << options->shadowDescriptorTablePtrHigh << "\n"; dumpFile << "options.extendedRobustness.robustBufferAccess = " << options->extendedRobustness.robustBufferAccess @@ -862,11 +867,17 @@ void PipelineDumper::dumpPipelineOptions(const PipelineOptions *options, std::os dumpFile << "options.extendedRobustness.robustImageAccess = " << options->extendedRobustness.robustImageAccess << "\n"; dumpFile << "options.extendedRobustness.nullDescriptor = " << options->extendedRobustness.nullDescriptor << "\n"; + dumpFile << "options.enableRayQuery = " << options->enableRayQuery << "\n"; dumpFile << "options.optimizeTessFactor = " << options->optimizeTessFactor << "\n"; + dumpFile << "options.enableInterpModePatch = " << options->enableInterpModePatch << "\n"; + dumpFile << "options.pageMigrationEnabled = " << options->pageMigrationEnabled << "\n"; dumpFile << "options.optimizationLevel = " << options->optimizationLevel << "\n"; + dumpFile << "options.overrideThreadGroupSizeX = " << options->overrideThreadGroupSizeX << "\n"; + dumpFile << "options.overrideThreadGroupSizeY = " << options->overrideThreadGroupSizeY << "\n"; + dumpFile << "options.overrideThreadGroupSizeZ = " << options->overrideThreadGroupSizeZ << "\n"; + dumpFile << "options.resourceLayoutScheme = " << options->resourceLayoutScheme << "\n"; dumpFile << "options.threadGroupSwizzleMode = " << options->threadGroupSwizzleMode << "\n"; dumpFile << "options.reverseThreadGroup = " << options->reverseThreadGroup << "\n"; - dumpFile << "options.enableImplicitInvariantExports = " << options->enableImplicitInvariantExports << "\n"; dumpFile << "options.internalRtShaders = " << options->internalRtShaders << "\n"; dumpFile << "options.forceNonUniformResourceIndexStageMask = " << options->forceNonUniformResourceIndexStageMask << "\n"; @@ -919,6 +930,7 @@ void PipelineDumper::dumpGraphicsStateInfo(const GraphicsPipelineBuildInfo *pipe dumpFile << "disableVertexReuse = " << pipelineInfo->iaState.disableVertexReuse << "\n"; dumpFile << "switchWinding = " << pipelineInfo->iaState.switchWinding << "\n"; dumpFile << "enableMultiView = " << pipelineInfo->iaState.enableMultiView << "\n"; + dumpFile << "useVertexBufferDescArray = " << pipelineInfo->iaState.useVertexBufferDescArray << "\n"; if (pipelineInfo->iaState.tessLevel) { dumpFile << "tessLevelInner[0] = " << pipelineInfo->iaState.tessLevel->inner[0] << "\n"; dumpFile << "tessLevelInner[1] = " << pipelineInfo->iaState.tessLevel->inner[1] << "\n"; @@ -930,6 +942,7 @@ void PipelineDumper::dumpGraphicsStateInfo(const GraphicsPipelineBuildInfo *pipe dumpFile << "depthClipEnable = " << pipelineInfo->vpState.depthClipEnable << "\n"; dumpFile << "rasterizerDiscardEnable = " << pipelineInfo->rsState.rasterizerDiscardEnable << "\n"; + dumpFile << "innerCoverage = " << pipelineInfo->rsState.innerCoverage << "\n"; dumpFile << "perSampleShading = " << pipelineInfo->rsState.perSampleShading << "\n"; dumpFile << "numSamples = " << pipelineInfo->rsState.numSamples << "\n"; dumpFile << "pixelShaderSamples = " << pipelineInfo->rsState.pixelShaderSamples << "\n"; @@ -966,12 +979,53 @@ void PipelineDumper::dumpGraphicsStateInfo(const GraphicsPipelineBuildInfo *pipe dumpFile << "nggState.subgroupSizing = " << pipelineInfo->nggState.subgroupSizing << "\n"; dumpFile << "nggState.primsPerSubgroup = " << pipelineInfo->nggState.primsPerSubgroup << "\n"; dumpFile << "nggState.vertsPerSubgroup = " << pipelineInfo->nggState.vertsPerSubgroup << "\n"; + dumpFile << "unlinked = " << pipelineInfo->unlinked << "\n"; dumpFile << "dynamicVertexStride = " << pipelineInfo->dynamicVertexStride << "\n"; dumpFile << "enableUberFetchShader = " << pipelineInfo->enableUberFetchShader << "\n"; dumpFile << "enableEarlyCompile = " << pipelineInfo->enableEarlyCompile << "\n"; dumpFile << "enableColorExportShader = " << pipelineInfo->enableColorExportShader << "\n"; dumpFile << "useSoftwareVertexBufferDescriptors = " << pipelineInfo->useSoftwareVertexBufferDescriptors << "\n"; - dumpFile << "vbAddressLowBitsKnown = " << pipelineInfo->vbAddressLowBitsKnown << "\n"; + + dumpFile << "originUpperLeft = " << pipelineInfo->getGlState().originUpperLeft << "\n"; + if (pipelineInfo->clientMetadataSize > 0) { + dumpFile << "clientMetadata = "; + const uint8_t *pClientMetadata = reinterpret_cast(pipelineInfo->pClientMetadata); + for (unsigned i = 0; i < pipelineInfo->clientMetadataSize - 1; i++) + dumpFile << pClientMetadata[i] << ","; + dumpFile << pClientMetadata[pipelineInfo->clientMetadataSize - 1] << "\n"; + } + + if (pipelineInfo->getGlState().numUniformConstantMaps != 0) { + for (unsigned s = 0; s < pipelineInfo->getGlState().numUniformConstantMaps; s++) { + if (!pipelineInfo->getGlState().ppUniformMaps[s]) + continue; + dumpFile << "uniformConstantMaps[" << s + << "].visibility = " << pipelineInfo->getGlState().ppUniformMaps[s]->visibility << "\n"; + UniformConstantMapEntry *locationOffsetMap = pipelineInfo->getGlState().ppUniformMaps[s]->pUniforms; + for (unsigned i = 0; i < pipelineInfo->getGlState().ppUniformMaps[s]->numUniformConstants; i++) { + dumpFile << "uniformConstantMaps[" << s << "].uniformConstants[" << i + << "].location = " << locationOffsetMap[i].location << "\n"; + dumpFile << "uniformConstantMaps[" << s << "].uniformConstants[" << i + << "].offset = " << locationOffsetMap[i].offset << "\n"; + } + } + } + + dumpFile << "forceDisableStreamOut = " << pipelineInfo->getGlState().apiXfbOutData.forceDisableStreamOut << "\n"; +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 70 + dumpFile << "forceEnablePrimStats = " << pipelineInfo->apiXfbOutData.forceEnablePrimStats << "\n"; +#endif + const auto pXfbOutInfos = pipelineInfo->getGlState().apiXfbOutData.pXfbOutInfos; + for (unsigned idx = 0; idx < pipelineInfo->getGlState().apiXfbOutData.numXfbOutInfo; ++idx) { + dumpFile << "xfbOutInfo[" << idx << "].isBuiltIn = " << pXfbOutInfos[idx].isBuiltIn << "\n"; + dumpFile << "xfbOutInfo[" << idx << "].location = " << pXfbOutInfos[idx].location << "\n"; + dumpFile << "xfbOutInfo[" << idx << "].component = " << pXfbOutInfos[idx].component << "\n"; + dumpFile << "xfbOutInfo[" << idx << "].xfbBuffer = " << pXfbOutInfos[idx].xfbBuffer << "\n"; + dumpFile << "xfbOutInfo[" << idx << "].xfbOffset = " << pXfbOutInfos[idx].xfbOffset << "\n"; + dumpFile << "xfbOutInfo[" << idx << "].xfbStride = " << pXfbOutInfos[idx].xfbStride << "\n"; + dumpFile << "xfbOutInfo[" << idx << "].streamId = " << pXfbOutInfos[idx].streamId << "\n"; + } + dumpFile << "vbAddressLowBitsKnown = " << pipelineInfo->getGlState().vbAddressLowBitsKnown << "\n"; dumpPipelineOptions(&pipelineInfo->options, dumpFile); #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 @@ -1006,9 +1060,8 @@ void PipelineDumper::dumpGraphicsStateInfo(const GraphicsPipelineBuildInfo *pipe dumpFile << "attribute[" << i << "].binding = " << attrib->binding << "\n"; dumpFile << "attribute[" << i << "].format = " << attrib->format << "\n"; dumpFile << "attribute[" << i << "].offset = " << attrib->offset << "\n"; - dumpFile << "attribute[" << i - << "].vbAddressLowBits = " << static_cast((pipelineInfo->vbAddressLowBits[attrib->binding])) - << "\n"; + dumpFile << "attribute[" << i << "].vbAddressLowBits = " + << static_cast((pipelineInfo->getGlState().vbAddressLowBits[attrib->binding])) << "\n"; } auto divisorState = findVkStructInChain( @@ -1020,39 +1073,6 @@ void PipelineDumper::dumpGraphicsStateInfo(const GraphicsPipelineBuildInfo *pipe dumpFile << "divisor[" << i << "].divisor = " << divisor->divisor << "\n"; } } - - if (pipelineInfo->numUniformConstantMaps != 0) { - dumpFile << "\n[UniformConstant]\n"; - for (unsigned s = 0; s < pipelineInfo->numUniformConstantMaps; s++) { - if (!pipelineInfo->ppUniformMaps[s]) - continue; - dumpFile << "uniformConstantMaps[" << s << "].visibility = " << pipelineInfo->ppUniformMaps[s]->visibility - << "\n"; - UniformConstantMapEntry *locationOffsetMap = pipelineInfo->ppUniformMaps[s]->pUniforms; - for (unsigned i = 0; i < pipelineInfo->ppUniformMaps[s]->numUniformConstants; i++) { - dumpFile << "uniformConstantMaps[" << s << "].uniformConstants[" << i - << "].location = " << locationOffsetMap[i].location << "\n"; - dumpFile << "uniformConstantMaps[" << s << "].uniformConstants[" << i - << "].offset = " << locationOffsetMap[i].offset << "\n"; - } - } - } - - dumpFile << "\n[ApiXfbOutInfo]\n"; - dumpFile << "forceDisableStreamOut = " << pipelineInfo->apiXfbOutData.forceDisableStreamOut << "\n"; -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 70 - dumpFile << "forceEnablePrimStats = " << pipelineInfo->apiXfbOutData.forceEnablePrimStats << "\n"; -#endif - const auto pXfbOutInfos = pipelineInfo->apiXfbOutData.pXfbOutInfos; - for (unsigned idx = 0; idx < pipelineInfo->apiXfbOutData.numXfbOutInfo; ++idx) { - dumpFile << "xfbOutInfo[" << idx << "].isBuiltIn = " << pXfbOutInfos[idx].isBuiltIn << "\n"; - dumpFile << "xfbOutInfo[" << idx << "].location = " << pXfbOutInfos[idx].location << "\n"; - dumpFile << "xfbOutInfo[" << idx << "].component = " << pXfbOutInfos[idx].component << "\n"; - dumpFile << "xfbOutInfo[" << idx << "].xfbBuffer = " << pXfbOutInfos[idx].xfbBuffer << "\n"; - dumpFile << "xfbOutInfo[" << idx << "].xfbOffset = " << pXfbOutInfos[idx].xfbOffset << "\n"; - dumpFile << "xfbOutInfo[" << idx << "].xfbStride = " << pXfbOutInfos[idx].xfbStride << "\n"; - dumpFile << "xfbOutInfo[" << idx << "].streamId = " << pXfbOutInfos[idx].streamId << "\n"; - } } // ===================================================================================================================== @@ -1182,20 +1202,19 @@ void PipelineDumper::dumpRayTracingStateInfo(const RayTracingPipelineBuildInfo * // @param dumpDir : Directory of pipeline dump // @param dumpFile : Pipeline dump file void PipelineDumper::dumpRayTracingRtState(const RtState *rtState, const char *dumpDir, std::ostream &dumpStream) { + dumpStream << "rtState.nodeStrideShift = " << rtState->nodeStrideShift << "\n"; dumpStream << "rtState.bvhResDescSize = " << rtState->bvhResDesc.dataSizeInDwords << "\n"; for (unsigned i = 0; i < rtState->bvhResDesc.dataSizeInDwords; ++i) dumpStream << "rtState.bvhResDesc[" << i << "] = " << rtState->bvhResDesc.descriptorData[i] << "\n"; - - dumpStream << "rtState.nodeStrideShift = " << rtState->nodeStrideShift << "\n"; dumpStream << "rtState.staticPipelineFlags = " << rtState->staticPipelineFlags << "\n"; dumpStream << "rtState.triCompressMode = " << rtState->triCompressMode << "\n"; + dumpStream << "rtState.boxSortHeuristicMode = " << rtState->boxSortHeuristicMode << "\n"; dumpStream << "rtState.pipelineFlags = " << rtState->pipelineFlags << "\n"; + dumpStream << "rtState.counterMode = " << rtState->counterMode << "\n"; + dumpStream << "rtState.counterMask = " << rtState->counterMask << "\n"; dumpStream << "rtState.threadGroupSizeX = " << rtState->threadGroupSizeX << "\n"; dumpStream << "rtState.threadGroupSizeY = " << rtState->threadGroupSizeY << "\n"; dumpStream << "rtState.threadGroupSizeZ = " << rtState->threadGroupSizeZ << "\n"; - dumpStream << "rtState.boxSortHeuristicMode = " << rtState->boxSortHeuristicMode << "\n"; - dumpStream << "rtState.counterMode = " << rtState->counterMode << "\n"; - dumpStream << "rtState.counterMask = " << rtState->counterMask << "\n"; dumpStream << "rtState.rayQueryCsSwizzle = " << rtState->rayQueryCsSwizzle << "\n"; dumpStream << "rtState.ldsStackSize = " << rtState->ldsStackSize << "\n"; dumpStream << "rtState.dispatchRaysThreadGroupSize = " << rtState->dispatchRaysThreadGroupSize << "\n"; @@ -1221,6 +1240,8 @@ void PipelineDumper::dumpRayTracingRtState(const RtState *rtState, const char *d dumpStream << "rtState.exportConfig.enableUniformNoReturn = " << rtState->exportConfig.enableUniformNoReturn << "\n"; dumpStream << "rtState.exportConfig.enableTraceRayArgsInLds = " << rtState->exportConfig.enableTraceRayArgsInLds << "\n"; + dumpStream << "rtState.exportConfig.enableReducedLinkageOpt = " << rtState->exportConfig.enableReducedLinkageOpt + << "\n"; dumpStream << "rtState.exportConfig.readsDispatchRaysIndex = " << rtState->exportConfig.readsDispatchRaysIndex << "\n"; dumpStream << "rtState.exportConfig.enableDynamicLaunch = " << rtState->exportConfig.enableDynamicLaunch << "\n"; @@ -1312,19 +1333,21 @@ void PipelineDumper::dumpRayTracingLibrarySummary(PipelineDumpFile *dumpFile, co // Update hash code for the pipeline rtstate // // @param rtState : Pipeline rtstate -// @param [in,out] hasher : Haher to generate hash code +// @param [in,out] hasher : Hasher to generate hash code // @param isCacheHash : TRUE if hash is used by the shader cache void PipelineDumper::updateHashForRtState(const RtState *rtState, MetroHash64 *hasher, bool isCacheHash) { hasher->Update(rtState->nodeStrideShift); + for (unsigned i = 0; i < rtState->bvhResDesc.dataSizeInDwords; ++i) + hasher->Update(rtState->bvhResDesc.descriptorData[i]); hasher->Update(rtState->staticPipelineFlags); hasher->Update(rtState->triCompressMode); + hasher->Update(rtState->boxSortHeuristicMode); + hasher->Update(rtState->pipelineFlags); + hasher->Update(rtState->counterMode); + hasher->Update(rtState->counterMask); hasher->Update(rtState->threadGroupSizeX); hasher->Update(rtState->threadGroupSizeY); hasher->Update(rtState->threadGroupSizeZ); - for (unsigned i = 0; i < rtState->bvhResDesc.dataSizeInDwords; ++i) - hasher->Update(rtState->bvhResDesc.descriptorData[i]); - - hasher->Update(rtState->counterMask); hasher->Update(rtState->rayQueryCsSwizzle); hasher->Update(rtState->ldsStackSize); hasher->Update(rtState->dispatchRaysThreadGroupSize); @@ -1341,6 +1364,7 @@ void PipelineDumper::updateHashForRtState(const RtState *rtState, MetroHash64 *h hasher->Update(rtState->exportConfig.indirectCalleeSavedRegs.traceRays); hasher->Update(rtState->exportConfig.enableUniformNoReturn); hasher->Update(rtState->exportConfig.enableTraceRayArgsInLds); + hasher->Update(rtState->exportConfig.enableReducedLinkageOpt); hasher->Update(rtState->exportConfig.readsDispatchRaysIndex); hasher->Update(rtState->exportConfig.enableDynamicLaunch); hasher->Update(rtState->exportConfig.emitRaytracingShaderDataToken); @@ -1380,6 +1404,21 @@ void PipelineDumper::updateHashForRtState(const RtState *rtState, MetroHash64 *h } } +// ===================================================================================================================== +// Update hash code for the UniformConstantMap +// +// @param pUniformConstantMap : Pipeline uniform constant map +// @param [in,out] hasher : Hasher to generate hash code +void PipelineDumper::updateHashForUniformConstantMap(const UniformConstantMap *pUniformConstantMap, + MetroHash64 *hasher) { + static_assert(sizeof(UniformConstantMapEntry) == sizeof(unsigned) * 2, ""); + if (pUniformConstantMap->numUniformConstants > 0) { + hasher->Update(pUniformConstantMap->visibility); + hasher->Update(reinterpret_cast(pUniformConstantMap->pUniforms), + sizeof(UniformConstantMapEntry) * pUniformConstantMap->numUniformConstants); + } +} + // ===================================================================================================================== // Builds hash code from graphics pipeline build info. If stage is a specific stage of the graphics pipeline, then only // the portions of the pipeline build info that affect that stage will be included in the hash. Otherwise, stage must @@ -1387,7 +1426,6 @@ void PipelineDumper::updateHashForRtState(const RtState *rtState, MetroHash64 *h // // @param pipeline : Info to build a graphics pipeline // @param isCacheHash : TRUE if the hash is used by shader cache -// @param isRelocatableShader : TRUE if we are building relocatable shader // @param stage : The stage for which we are building the hash. ShaderStageInvalid if building for the entire pipeline. MetroHash::Hash PipelineDumper::generateHashForGraphicsPipeline(const GraphicsPipelineBuildInfo *pipeline, bool isCacheHash, @@ -1434,9 +1472,12 @@ MetroHash::Hash PipelineDumper::generateHashForGraphicsPipeline(const GraphicsPi if (unlinkedShaderType != UnlinkedStageFragment) { if (!pipeline->enableUberFetchShader) { updateHashForVertexInputState(pipeline->pVertexInput, pipeline->dynamicVertexStride, &hasher); - hasher.Update(pipeline->vbAddressLowBits); + if (pipeline->getGlState().vbAddressLowBitsKnown) { + hasher.Update(pipeline->getGlState().vbAddressLowBitsKnown); + hasher.Update(pipeline->getGlState().vbAddressLowBits, pipeline->pVertexInput->vertexAttributeDescriptionCount); + } } - hasher.Update(pipeline->vbAddressLowBitsKnown); + updateHashForNonFragmentState(pipeline, isCacheHash, &hasher); } @@ -1444,9 +1485,14 @@ MetroHash::Hash PipelineDumper::generateHashForGraphicsPipeline(const GraphicsPi updateHashForFragmentState(pipeline, &hasher); updateHashForRtState(&pipeline->rtState, &hasher, isCacheHash); - - if (pipeline->iaState.tessLevel) - hasher.Update(*pipeline->iaState.tessLevel); + if (pipeline->clientMetadataSize > 0) { + hasher.Update(reinterpret_cast(pipeline->pClientMetadata), pipeline->clientMetadataSize); + } + for (unsigned i = 0; i < pipeline->getGlState().numUniformConstantMaps; i++) { + if (pipeline->getGlState().ppUniformMaps[i] != nullptr) { + updateHashForUniformConstantMap(pipeline->getGlState().ppUniformMaps[i], &hasher); + } + } MetroHash::Hash hash = {}; hasher.Finalize(hash.bytes); @@ -1459,7 +1505,6 @@ MetroHash::Hash PipelineDumper::generateHashForGraphicsPipeline(const GraphicsPi // // @param pipeline : Info to build a compute pipeline // @param isCacheHash : TRUE if the hash is used by shader cache -// @param isRelocatableShader : TRUE if we are building relocatable shader MetroHash::Hash PipelineDumper::generateHashForComputePipeline(const ComputePipelineBuildInfo *pipeline, bool isCacheHash) { MetroHash64 hasher; @@ -1477,6 +1522,14 @@ MetroHash::Hash PipelineDumper::generateHashForComputePipeline(const ComputePipe // Relocatable shaders force an unlinked compilation. hasher.Update(pipeline->unlinked); + if (pipeline->clientMetadataSize > 0) { + hasher.Update(reinterpret_cast(pipeline->pClientMetadata), pipeline->clientMetadataSize); + } + + if (pipeline->pUniformMap != nullptr) { + updateHashForUniformConstantMap(pipeline->pUniformMap, &hasher); + } + MetroHash::Hash hash = {}; hasher.Finalize(hash.bytes); @@ -1521,9 +1574,11 @@ MetroHash::Hash PipelineDumper::generateHashForRayTracingPipeline(const RayTraci hasher.Update(pipeline->libraryMode); hasher.Update(pipeline->libraryCount); for (unsigned i = 0; i < pipeline->libraryCount; ++i) { - hasher.Update(pipeline->pLibrarySummaries->codeSize); - hasher.Update(static_cast(pipeline->pLibrarySummaries->pCode), - pipeline->pLibrarySummaries->codeSize); + hasher.Update(pipeline->pLibrarySummaries[i].codeSize); + if (pipeline->pLibrarySummaries[i].codeSize > 0) { + hasher.Update(static_cast(pipeline->pLibrarySummaries[i].pCode), + pipeline->pLibrarySummaries[i].codeSize); + } } hasher.Update(pipeline->payloadSizeMaxInLib); @@ -1537,7 +1592,9 @@ MetroHash::Hash PipelineDumper::generateHashForRayTracingPipeline(const RayTraci } } #endif - + if (pipeline->clientMetadataSize > 0) { + hasher.Update(reinterpret_cast(pipeline->pClientMetadata), pipeline->clientMetadataSize); + } MetroHash::Hash hash = {}; hasher.Finalize(hash.bytes); @@ -1548,13 +1605,13 @@ MetroHash::Hash PipelineDumper::generateHashForRayTracingPipeline(const RayTraci // Updates hash code context for vertex input state // // @param vertexInput : Vertex input state -// @param [in/out] hasher : Haher to generate hash code +// @param [in/out] hasher : Hasher to generate hash code void PipelineDumper::updateHashForVertexInputState(const VkPipelineVertexInputStateCreateInfo *vertexInput, bool dynamicVertexStride, MetroHash64 *hasher) { if (vertexInput && vertexInput->vertexBindingDescriptionCount > 0) { hasher->Update(vertexInput->vertexBindingDescriptionCount); if (dynamicVertexStride) { - for (uint32_t i = 0; i < vertexInput->vertexBindingDescriptionCount; i++) { + for (unsigned i = 0; i < vertexInput->vertexBindingDescriptionCount; i++) { auto attribBinding = vertexInput->pVertexBindingDescriptions[i]; attribBinding.stride = 0; hasher->Update(attribBinding); @@ -1586,7 +1643,6 @@ void PipelineDumper::updateHashForVertexInputState(const VkPipelineVertexInputSt // @param pipeline : Info to build a graphics pipeline // @param isCacheHash : TRUE if the hash is used by shader cache // @param [in/out] hasher : Hasher to generate hash code -// @param isRelocatableShader : TRUE if we are building relocatable shader void PipelineDumper::updateHashForNonFragmentState(const GraphicsPipelineBuildInfo *pipeline, bool isCacheHash, MetroHash64 *hasher) { auto nggState = &pipeline->nggState; @@ -1598,7 +1654,10 @@ void PipelineDumper::updateHashForNonFragmentState(const GraphicsPipelineBuildIn hasher->Update(pipeline->rsState.provokingVertexMode); } - if (pipeline->gs.pModuleData || pipeline->tcs.pModuleData || pipeline->tes.pModuleData) + if (pipeline->gs.pModuleData || pipeline->tcs.pModuleData || pipeline->tes.pModuleData || + pipeline->gs.options.clientHash.lower != 0 || pipeline->gs.options.clientHash.upper != 0 || + pipeline->tcs.options.clientHash.lower != 0 || pipeline->tcs.options.clientHash.upper != 0 || + pipeline->tes.options.clientHash.lower != 0 || pipeline->tes.options.clientHash.upper != 0) hasher->Update(iaState->patchControlPoints); hasher->Update(iaState->disableVertexReuse); hasher->Update(iaState->switchWinding); @@ -1646,13 +1705,23 @@ void PipelineDumper::updateHashForNonFragmentState(const GraphicsPipelineBuildIn hasher->Update(nggState->vertsPerSubgroup); } } + hasher->Update(pipeline->useSoftwareVertexBufferDescriptors); + if (pipeline->iaState.tessLevel) + hasher->Update(*pipeline->iaState.tessLevel); - hasher->Update(pipeline->apiXfbOutData.forceDisableStreamOut); + hasher->Update(pipeline->getGlState().apiXfbOutData.forceDisableStreamOut); #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 70 hasher->Update(pipeline->apiXfbOutData.forceEnablePrimStats); #endif - hasher->Update(pipeline->useSoftwareVertexBufferDescriptors); - hasher->Update(pipeline->vbAddressLowBitsKnown); + for (unsigned i = 0; i < pipeline->getGlState().apiXfbOutData.numXfbOutInfo; i++) { + hasher->Update(pipeline->getGlState().apiXfbOutData.pXfbOutInfos[i].isBuiltIn); + hasher->Update(pipeline->getGlState().apiXfbOutData.pXfbOutInfos[i].location); + hasher->Update(pipeline->getGlState().apiXfbOutData.pXfbOutInfos[i].component); + hasher->Update(pipeline->getGlState().apiXfbOutData.pXfbOutInfos[i].xfbBuffer); + hasher->Update(pipeline->getGlState().apiXfbOutData.pXfbOutInfos[i].xfbOffset); + hasher->Update(pipeline->getGlState().apiXfbOutData.pXfbOutInfos[i].xfbStride); + hasher->Update(pipeline->getGlState().apiXfbOutData.pXfbOutInfos[i].streamId); + } } // ===================================================================================================================== @@ -1660,7 +1729,6 @@ void PipelineDumper::updateHashForNonFragmentState(const GraphicsPipelineBuildIn // // @param pipeline : Info to build a graphics pipeline // @param [in/out] hasher : Hasher to generate hash code -// @param isRelocatableShader : TRUE if we are building relocatable shader void PipelineDumper::updateHashForFragmentState(const GraphicsPipelineBuildInfo *pipeline, MetroHash64 *hasher) { auto rsState = &pipeline->rsState; hasher->Update(rsState->perSampleShading); @@ -1688,6 +1756,8 @@ void PipelineDumper::updateHashForFragmentState(const GraphicsPipelineBuildInfo hasher->Update(cbState->target[i].blendSrcAlphaToColor); hasher->Update(cbState->target[i].format); } + + hasher->Update(pipeline->getGlState().originUpperLeft); } // ===================================================================================================================== @@ -1696,42 +1766,39 @@ void PipelineDumper::updateHashForFragmentState(const GraphicsPipelineBuildInfo // @param options: Pipeline options // @param [in/out] hasher : Hasher to generate hash code // @param isCacheHash : True if the hash will be used as a key for a cache lookup. -// @param isRelocatableShader : TRUE if we are building a relocatable shader // @param stage : The unlinked shader stage that should be included in the hash. void PipelineDumper::updateHashForPipelineOptions(const PipelineOptions *options, MetroHash64 *hasher, bool isCacheHash, UnlinkedShaderStage stage) { hasher->Update(options->includeDisassembly); hasher->Update(options->scalarBlockLayout); - hasher->Update(options->includeIr); - hasher->Update(options->robustBufferAccess); hasher->Update(options->reconfigWorkgroupLayout); hasher->Update(options->forceCsThreadIdSwizzling); - hasher->Update(options->overrideThreadGroupSizeX); - hasher->Update(options->overrideThreadGroupSizeY); - hasher->Update(options->overrideThreadGroupSizeZ); + hasher->Update(options->includeIr); + hasher->Update(options->robustBufferAccess); hasher->Update(options->enableRelocatableShaderElf); hasher->Update(options->disableImageResourceCheck); hasher->Update(options->enableScratchAccessBoundsChecks); hasher->Update(options->enableImplicitInvariantExports); - hasher->Update(options->resourceLayoutScheme); - hasher->Update(options->shadowDescriptorTableUsage); hasher->Update(options->shadowDescriptorTablePtrHigh); - hasher->Update(options->extendedRobustness.robustBufferAccess); hasher->Update(options->extendedRobustness.robustImageAccess); hasher->Update(options->extendedRobustness.nullDescriptor); + hasher->Update(options->enableRayQuery); if (stage != UnlinkedStageCompute) { hasher->Update(options->optimizeTessFactor); } - if (stage == UnlinkedStageFragment || stage == UnlinkedStageCount) { hasher->Update(options->enableInterpModePatch); hasher->Update(options->disableSampleMask); } - hasher->Update(options->pageMigrationEnabled); hasher->Update(options->optimizationLevel); + hasher->Update(options->overrideThreadGroupSizeX); + hasher->Update(options->overrideThreadGroupSizeY); + hasher->Update(options->overrideThreadGroupSizeZ); + hasher->Update(options->resourceLayoutScheme); + hasher->Update(options->threadGroupSwizzleMode); hasher->Update(options->reverseThreadGroup); hasher->Update(options->internalRtShaders); @@ -1744,6 +1811,7 @@ void PipelineDumper::updateHashForPipelineOptions(const PipelineOptions *options hasher->Update(options->enableFragColor); hasher->Update(options->disableBaseVertex); hasher->Update(options->enablePrimGeneratedQuery); + // disablePerCompFetch has been handled in updateHashForNonFragmentState } // ===================================================================================================================== @@ -1753,17 +1821,22 @@ void PipelineDumper::updateHashForPipelineOptions(const PipelineOptions *options // @param shaderInfo : Shader info in specified shader stage // @param isCacheHash : TRUE if the hash is used by shader cache // @param [in/out] hasher : Hasher to generate hash code -// @param isRelocatableShader : TRUE if we are building relocatable shader void PipelineDumper::updateHashForPipelineShaderInfo(ShaderStage stage, const PipelineShaderInfo *shaderInfo, bool isCacheHash, MetroHash64 *hasher) { - if (shaderInfo->pModuleData) { - const ShaderModuleData *moduleData = reinterpret_cast(shaderInfo->pModuleData); + if (shaderInfo->pModuleData || (shaderInfo->options.clientHash.lower != 0) || + (shaderInfo->options.clientHash.upper != 0)) { hasher->Update(stage); - if (isCacheHash) { - hasher->Update(static_cast(voidPtrInc(moduleData, ShaderModuleCacheHashOffset)), - sizeof(moduleData->hash)); - } else - hasher->Update(moduleData->hash); + if ((shaderInfo->options.clientHash.lower != 0) || (shaderInfo->options.clientHash.upper != 0)) { + hasher->Update(shaderInfo->options.clientHash); + } else { + const ShaderModuleData *moduleData = reinterpret_cast(shaderInfo->pModuleData); + hasher->Update(stage); + if (isCacheHash) { + hasher->Update(static_cast(voidPtrInc(moduleData, ShaderModuleCacheHashOffset)), + sizeof(moduleData->hash)); + } else + hasher->Update(moduleData->hash); + } size_t entryNameLen = 0; if (shaderInfo->pEntryTarget) { diff --git a/tool/dumper/vkgcPipelineDumper.h b/tool/dumper/vkgcPipelineDumper.h index ee69c485e9..97b49ff2f7 100644 --- a/tool/dumper/vkgcPipelineDumper.h +++ b/tool/dumper/vkgcPipelineDumper.h @@ -123,6 +123,7 @@ class PipelineDumper { static void dumpRayTracingStateInfo(const RayTracingPipelineBuildInfo *pipelineInfo, const char *dumpDir, std::ostream &dumpFile); static void updateHashForRtState(const RtState *rtState, MetroHash64 *hasher, bool isCacheHash); + static void updateHashForUniformConstantMap(const UniformConstantMap *pUniformConstantMap, MetroHash64 *hasher); static void dumpVersionInfo(std::ostream &dumpFile); static void dumpPipelineShaderInfo(const PipelineShaderInfo *shaderInfo, std::ostream &dumpFile); diff --git a/tool/vfx/vfx.h b/tool/vfx/vfx.h index c83c9dff6f..85e99a92af 100644 --- a/tool/vfx/vfx.h +++ b/tool/vfx/vfx.h @@ -66,21 +66,22 @@ typedef Vkgc::ShaderStage ShaderStage; // ===================================================================================================================== // Common definition of VfxParser -static const unsigned NativeShaderStageCount = 8; // Number of native shader stages in Vulkan -static const unsigned MaxRenderSectionCount = 16; // Max render document section count -static const unsigned MaxBindingCount = 16; // Max binding count -static const unsigned MaxResultCount = 16; // Max result count -static const unsigned MaxPushConstRangCount = 16; // Max push const range count -static const unsigned MaxVertexBufferBindingCount = 16; // Max vertex buffer binding count -static const unsigned MaxVertexAttributeCount = 32; // Max vertex attribute count -static const unsigned MaxSpecConstantCount = 32; // Max spec constant count -static const unsigned VfxSizeOfVec4 = 16; // Ehe size of vec4 -static const unsigned VfxInvalidValue = 0xFFFFFFFF; // Invalid value -static const unsigned VfxVertexBufferSetId = 0xFFFFFFFE; // Vertex buffer set id -static const unsigned VfxIndexBufferSetId = 0xFFFFFFFD; // Index buffer set id -static const unsigned VfxDynamicArrayId = 0xFFFFFFFC; // Dynamic array id -static const size_t MaxKeyBufSize = 256; // Buffer size to parse a key-value pair key in VFX file. -static const size_t MaxLineBufSize = 65536; // Buffer size to parse a line in VFX file. +static const unsigned NativeShaderStageCount = 8; // Number of native shader stages in Vulkan +static const unsigned MaxRenderSectionCount = 16; // Max render document section count +static const unsigned MaxBindingCount = 16; // Max binding count +static const unsigned MaxResultCount = 16; // Max result count +static const unsigned MaxPushConstRangCount = 16; // Max push const range count +static const unsigned MaxVertexBufferBindingCount = 16; // Max vertex buffer binding count +static const unsigned MaxInternalVertexBufferBindingCount = 65; // Max Internal vertex buffer binding count. +static const unsigned MaxVertexAttributeCount = 32; // Max vertex attribute count +static const unsigned MaxSpecConstantCount = 32; // Max spec constant count +static const unsigned VfxSizeOfVec4 = 16; // Ehe size of vec4 +static const unsigned VfxInvalidValue = 0xFFFFFFFF; // Invalid value +static const unsigned VfxVertexBufferSetId = 0xFFFFFFFE; // Vertex buffer set id +static const unsigned VfxIndexBufferSetId = 0xFFFFFFFD; // Index buffer set id +static const unsigned VfxDynamicArrayId = 0xFFFFFFFC; // Dynamic array id +static const size_t MaxKeyBufSize = 256; // Buffer size to parse a key-value pair key in VFX file. +static const size_t MaxLineBufSize = 65536; // Buffer size to parse a line in VFX file. #define VFX_ASSERT(...) assert(__VA_ARGS__); #define VFX_NEW new @@ -499,85 +500,6 @@ struct ColorBuffer { unsigned blendSrcAlphaToColor; // Whether source alpha is blended to color channels for this target at draw time }; -#if VFX_SUPPORT_VK_PIPELINE -// ===================================================================================================================== -// Represents GraphicsPipelineState section. -struct GraphicsPipelineState { - VkPrimitiveTopology topology; // Primitive type - VkProvokingVertexModeEXT provokingVertexMode; // Provoking vertex mode - unsigned patchControlPoints; // Patch control points - unsigned deviceIndex; // Device index for device group - unsigned disableVertexReuse; // Disable reusing vertex shader output for indexed draws - unsigned depthClipEnable; // Enable clipping based on Z coordinate - unsigned rasterizerDiscardEnable; // Kill all rasterized pixels - unsigned perSampleShading; // Enable per sample shading - unsigned numSamples; // Number of coverage samples used when rendering with this pipeline - unsigned pixelShaderSamples; // Controls the pixel shader execution rate - unsigned samplePatternIdx; // Index into the currently bound MSAA sample pattern table - unsigned dynamicSampleInfo; // Whether to enable dynamic sample - unsigned rasterStream; // Which vertex stream to rasterize - unsigned usrClipPlaneMask; // Mask to indicate the enabled user defined clip planes - unsigned alphaToCoverageEnable; // Enable alpha to coverage - unsigned dualSourceBlendEnable; // Blend state bound at draw time will use a dual source blend mode - unsigned dualSourceBlendDynamic; // Dual source blend mode is dynamically set - unsigned switchWinding; // reverse the TCS declared output primitive vertex order - unsigned enableMultiView; // Whether to enable multi-view support - Vkgc::PipelineOptions options; // Pipeline options - - Vkgc::NggState nggState; // NGG state - - ColorBuffer colorBuffer[Vkgc::MaxColorTargets]; // Color target state. -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 - Vkgc::BinaryData shaderLibrary; // Shader library SPIR-V binary -#endif - Vkgc::RtState rtState; // Ray tracing state - bool dynamicVertexStride; // Dynamic Vertex input Stride is enabled. - bool enableUberFetchShader; // Use uber fetch shader - bool enableEarlyCompile; // Enable early compile - bool enableColorExportShader; // Enable color export shader - bool useSoftwareVertexBufferDescriptors; // Use software vertex buffer descriptors - bool vbAddressLowBitsKnown; // Vertex buffer address low bits is known - - float tessLevelInner[2]; - float tessLevelOuter[4]; -}; - -// ===================================================================================================================== -// Represents ComputePipelineState section. -struct ComputePipelineState { - unsigned deviceIndex; // Device index for device group - Vkgc::PipelineOptions options; // Pipeline options -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 - Vkgc::BinaryData shaderLibrary; // Shader library SPIR-V binary -#endif - Vkgc::RtState rtState; // Ray tracing state -}; - -// ===================================================================================================================== -// Represents RayTracingPipelineState section. -struct RayTracingPipelineState { - unsigned deviceIndex; // Device index for device group - Vkgc::PipelineOptions options; // Pipeline options - unsigned shaderGroupCount; // Count of shader groups - VkRayTracingShaderGroupCreateInfoKHR *pShaderGroups; // An array of shader groups -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 - Vkgc::BinaryData shaderTraceRay; // Trace-ray SPIR-V binary -#endif - unsigned maxRecursionDepth; // Ray tracing max recursion depth - unsigned indirectStageMask; // Trace-ray indirect stage mask - Vkgc::LlpcRaytracingMode mode; // Raytracing Compiling mode - Vkgc::RtState rtState; // Ray tracing state - unsigned payloadSizeMaxInLib; // Pipeline library maxPayloadSize - unsigned attributeSizeMaxInLib; // Pipeline library maxAttributeSize - bool hasPipelineLibrary; // Whether has pipeline library - unsigned pipelineLibStageMask; // Pipeline library stage mask - - /// Combination of GpuRt::ShaderLibraryFeatureFlag - unsigned gpurtFeatureFlags; -}; - -#endif - }; // namespace Vfx #if VFX_SUPPORT_VK_PIPELINE diff --git a/tool/vfx/vfxParser.cpp b/tool/vfx/vfxParser.cpp index 06c3a51609..a98b2efe32 100644 --- a/tool/vfx/vfxParser.cpp +++ b/tool/vfx/vfxParser.cpp @@ -66,7 +66,7 @@ bool parseFVec4(char *str, unsigned lineNum, IUFValue *output); bool parseF16Vec4(char *str, unsigned lineNum, IUFValue *output); bool parseDVec2(char *str, unsigned lineNum, IUFValue *output); -bool parseIArray(char *str, unsigned lineNum, bool isSign, std::vector &bufMem); +bool parseIArray(char *str, unsigned lineNum, MemberType type, std::vector &bufMem); bool parseI64Array(char *str, unsigned lineNum, bool isSign, std::vector &bufMem); bool parseFArray(char *str, unsigned lineNum, std::vector &bufMem); bool parseF16Array(char *str, unsigned lineNum, std::vector &bufMem); @@ -503,11 +503,12 @@ bool Document::parseKeyValue(char *key, char *valueStr, unsigned lineNum, Sectio result = accessedSectionObject->set(lineNum, memberName, arrayIndex, &value); break; } + case MemberTypeU8Array: case MemberTypeIArray: case MemberTypeUArray: { std::vector **ppIntData = nullptr; accessedSectionObject->getPtrOf(lineNum, memberName, true, arrayIndex, &ppIntData, &m_errorMsg); - result = parseIArray(valueStr, lineNum, valueType == MemberTypeIArray, **ppIntData); + result = parseIArray(valueStr, lineNum, valueType, **ppIntData); break; } case MemberTypeI64Array: @@ -963,10 +964,12 @@ bool parseDVec2(char *str, unsigned lineNum, IUFValue *output) { // // @param str : Input string // @param lineNum : Current line number -// @param isSign : True if it is signed integer +// @param type : Member type // @param [in/out] bufMem : Buffer data -bool parseIArray(char *str, unsigned lineNum, bool isSign, std::vector &bufMem) { +bool parseIArray(char *str, unsigned lineNum, MemberType type, std::vector &bufMem) { bool result = true; + bool isSign = type == MemberTypeIArray; + bool isByte = type == MemberTypeU8Array; std::vector numbers = split(str, ", "); for (char *number : numbers) { @@ -989,7 +992,7 @@ bool parseIArray(char *str, unsigned lineNum, bool isSign, std::vector else iVal = strtol(number, nullptr, 0); - for (unsigned i = 0; i < sizeof(val); ++i) + for (unsigned i = 0; i < (isByte ? 1 : sizeof(val)); ++i) bufMem.push_back(val[i]); } diff --git a/tool/vfx/vfxPipelineDoc.cpp b/tool/vfx/vfxPipelineDoc.cpp index 6a91c7d584..9f6cd272c0 100644 --- a/tool/vfx/vfxPipelineDoc.cpp +++ b/tool/vfx/vfxPipelineDoc.cpp @@ -52,9 +52,6 @@ unsigned PipelineDocument::getMaxSectionCount(SectionType type) { case SectionTypeGraphicsState: maxSectionCount = 1; break; - case SectionTypeUniformConstant: - maxSectionCount = 1; - break; case SectionTypeComputeState: maxSectionCount = 1; break; @@ -76,8 +73,6 @@ unsigned PipelineDocument::getMaxSectionCount(SectionType type) { case SectionTypeShaderInfo: maxSectionCount = UINT32_MAX; break; - case SectionTypeApiXfbOutput: - maxSectionCount = 1; default: break; } @@ -107,130 +102,35 @@ VfxPipelineStatePtr PipelineDocument::getDocument() { // Section "GraphicsPipelineState" if (m_sections[SectionTypeGraphicsState].size() > 0) { - GraphicsPipelineState graphicState; m_pipelineState.pipelineType = VfxPipelineTypeGraphics; reinterpret_cast(m_sections[SectionTypeGraphicsState][0]) - ->getSubState(m_fileName, graphicState, &m_errorMsg); - auto gfxPipelineInfo = &m_pipelineState.gfxPipelineInfo; - gfxPipelineInfo->iaState.topology = graphicState.topology; - gfxPipelineInfo->rsState.provokingVertexMode = graphicState.provokingVertexMode; - gfxPipelineInfo->iaState.patchControlPoints = graphicState.patchControlPoints; - gfxPipelineInfo->iaState.deviceIndex = graphicState.deviceIndex; - gfxPipelineInfo->iaState.disableVertexReuse = graphicState.disableVertexReuse != 0; - gfxPipelineInfo->iaState.switchWinding = graphicState.switchWinding != 0; - gfxPipelineInfo->iaState.enableMultiView = graphicState.enableMultiView != 0; - gfxPipelineInfo->vpState.depthClipEnable = graphicState.depthClipEnable != 0; - gfxPipelineInfo->rsState.rasterizerDiscardEnable = graphicState.rasterizerDiscardEnable != 0; - gfxPipelineInfo->rsState.perSampleShading = graphicState.perSampleShading != 0; - gfxPipelineInfo->rsState.numSamples = graphicState.numSamples; - gfxPipelineInfo->rsState.pixelShaderSamples = graphicState.pixelShaderSamples; - gfxPipelineInfo->rsState.samplePatternIdx = graphicState.samplePatternIdx; - gfxPipelineInfo->rsState.dynamicSampleInfo = graphicState.dynamicSampleInfo; - gfxPipelineInfo->rsState.rasterStream = graphicState.rasterStream; - gfxPipelineInfo->rsState.usrClipPlaneMask = static_cast(graphicState.usrClipPlaneMask); - if (graphicState.tessLevelInner[0] < 0 || graphicState.tessLevelInner[1] < 0 || - graphicState.tessLevelOuter[0] < 0 || graphicState.tessLevelOuter[1] < 0 || graphicState.tessLevelOuter[2] < 0) - gfxPipelineInfo->iaState.tessLevel = nullptr; - else { - m_tessellationLevel.inner[0] = graphicState.tessLevelInner[0]; - m_tessellationLevel.inner[1] = graphicState.tessLevelInner[1]; - m_tessellationLevel.outer[0] = graphicState.tessLevelOuter[0]; - m_tessellationLevel.outer[1] = graphicState.tessLevelOuter[1]; - m_tessellationLevel.outer[2] = graphicState.tessLevelOuter[2]; - m_tessellationLevel.outer[3] = graphicState.tessLevelOuter[3]; - gfxPipelineInfo->iaState.tessLevel = &m_tessellationLevel; - } - - gfxPipelineInfo->cbState.alphaToCoverageEnable = graphicState.alphaToCoverageEnable != 0; - gfxPipelineInfo->cbState.dualSourceBlendEnable = graphicState.dualSourceBlendEnable != 0; - gfxPipelineInfo->cbState.dualSourceBlendDynamic = graphicState.dualSourceBlendDynamic != 0; - for (unsigned i = 0; i < MaxColorTargets; ++i) { - gfxPipelineInfo->cbState.target[i].format = graphicState.colorBuffer[i].format; - gfxPipelineInfo->cbState.target[i].channelWriteMask = - static_cast(graphicState.colorBuffer[i].channelWriteMask); - gfxPipelineInfo->cbState.target[i].blendEnable = graphicState.colorBuffer[i].blendEnable != 0; - gfxPipelineInfo->cbState.target[i].blendSrcAlphaToColor = graphicState.colorBuffer[i].blendSrcAlphaToColor != 0; - } - - gfxPipelineInfo->options = graphicState.options; - gfxPipelineInfo->nggState = graphicState.nggState; - gfxPipelineInfo->dynamicVertexStride = graphicState.dynamicVertexStride; - gfxPipelineInfo->enableUberFetchShader = graphicState.enableUberFetchShader; - gfxPipelineInfo->enableEarlyCompile = graphicState.enableEarlyCompile; - gfxPipelineInfo->enableColorExportShader = graphicState.enableColorExportShader; - gfxPipelineInfo->useSoftwareVertexBufferDescriptors = graphicState.useSoftwareVertexBufferDescriptors; - gfxPipelineInfo->vbAddressLowBitsKnown = graphicState.vbAddressLowBitsKnown; -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 - gfxPipelineInfo->shaderLibrary = graphicState.shaderLibrary; -#endif - gfxPipelineInfo->rtState = graphicState.rtState; - - if (m_sections[SectionTypeUniformConstant].size() > 0) { - UniformConstantState uniformState; - reinterpret_cast(m_sections[SectionTypeUniformConstant][0])->getSubState(uniformState); - gfxPipelineInfo->numUniformConstantMaps = uniformState.numUniformConstantMaps; - gfxPipelineInfo->ppUniformMaps = uniformState.uniformMaps; - } - - if (m_sections[SectionTypeApiXfbOutput].size() > 0) { - ApiXfbOutData *apiXfbOutData = &m_pipelineState.gfxPipelineInfo.apiXfbOutData; - reinterpret_cast(m_sections[SectionTypeApiXfbOutput][0])->getSubState(*apiXfbOutData); - } + ->getSubState(m_fileName, m_pipelineState.gfxPipelineInfo, &m_errorMsg); } - // Section "ComputePipelineState" if (m_sections[SectionTypeComputeState].size() > 0) { - ComputePipelineState computeState; m_pipelineState.pipelineType = VfxPipelineTypeCompute; reinterpret_cast(m_sections[SectionTypeComputeState][0]) - ->getSubState(m_fileName, computeState, &m_errorMsg); - auto computePipelineInfo = &m_pipelineState.compPipelineInfo; - computePipelineInfo->deviceIndex = computeState.deviceIndex; - computePipelineInfo->options = computeState.options; - computePipelineInfo->cs.entryStage = Vkgc::ShaderStageCompute; -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 - computePipelineInfo->shaderLibrary = computeState.shaderLibrary; -#endif - computePipelineInfo->rtState = computeState.rtState; - - if (m_sections[SectionTypeUniformConstant].size() > 0) { - UniformConstantState uniformState; - reinterpret_cast(m_sections[SectionTypeUniformConstant][0])->getSubState(uniformState); - assert(uniformState.numUniformConstantMaps == 1); - computePipelineInfo->pUniformMap = *uniformState.uniformMaps; - } + ->getSubState(m_fileName, m_pipelineState.compPipelineInfo, &m_errorMsg); } // Section "RayTracingPipelineState" if (m_sections[SectionTypeRayTracingState].size() > 0) { - RayTracingPipelineState rayTracingState; m_pipelineState.pipelineType = VfxPipelineTypeRayTracing; reinterpret_cast(m_sections[SectionTypeRayTracingState][0]) - ->getSubState(m_fileName, rayTracingState, &m_errorMsg); - auto rayTracingPipelineInfo = &m_pipelineState.rayPipelineInfo; - rayTracingPipelineInfo->deviceIndex = rayTracingState.deviceIndex; - rayTracingPipelineInfo->options = rayTracingState.options; - rayTracingPipelineInfo->shaderGroupCount = rayTracingState.shaderGroupCount; - rayTracingPipelineInfo->pShaderGroups = rayTracingState.pShaderGroups; -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 - rayTracingPipelineInfo->shaderTraceRay = rayTracingState.shaderTraceRay; -#endif - rayTracingPipelineInfo->maxRecursionDepth = rayTracingState.maxRecursionDepth; - rayTracingPipelineInfo->indirectStageMask = rayTracingState.indirectStageMask; - rayTracingPipelineInfo->mode = rayTracingState.mode; - rayTracingPipelineInfo->rtState = rayTracingState.rtState; - rayTracingPipelineInfo->payloadSizeMaxInLib = rayTracingState.payloadSizeMaxInLib; - rayTracingPipelineInfo->attributeSizeMaxInLib = rayTracingState.attributeSizeMaxInLib; - rayTracingPipelineInfo->hasPipelineLibrary = rayTracingState.hasPipelineLibrary; - rayTracingPipelineInfo->pipelineLibStageMask = rayTracingState.pipelineLibStageMask; + ->getSubState(m_fileName, m_pipelineState.rayPipelineInfo, &m_errorMsg); } // Section "VertexInputState" if (m_sections[SectionTypeVertexInputState].size() > 0) { reinterpret_cast(m_sections[SectionTypeVertexInputState][0])->getSubState(m_vertexInputState); m_pipelineState.gfxPipelineInfo.pVertexInput = &m_vertexInputState; +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 71 reinterpret_cast(m_sections[SectionTypeVertexInputState][0]) ->getvbAddressLowBits(m_pipelineState.gfxPipelineInfo.vbAddressLowBits); +#else + reinterpret_cast(m_sections[SectionTypeVertexInputState][0]) + ->getvbAddressLowBits(m_pipelineState.gfxPipelineInfo.glState.vbAddressLowBits); +#endif } if (m_pipelineState.pipelineType == VfxPipelineTypeGraphics || @@ -453,12 +353,6 @@ Section *PipelineDocument::createSection(const char *sectionName) { case SectionTypeResourceMapping: section = new SectionResourceMapping(); break; - case SectionTypeUniformConstant: - section = new SectionUniformConstant(); - break; - case SectionTypeApiXfbOutput: - section = new SectionApiXfbOutput(); - break; default: section = Document::createSection(sectionName); break; @@ -490,7 +384,6 @@ bool PipelineDocument::getPtrOfSubSection(Section *section, unsigned lineNum, co CASE_SUBSECTION(MemberTypeNggState, SectionNggState) CASE_SUBSECTION(MemberTypeUniformConstantMap, SectionUniformConstantMap) CASE_SUBSECTION(MemberTypeUniformConstantMapEntry, SectionUniformConstantMapEntry) - CASE_SUBSECTION(MemberTypeUniformConstant, SectionUniformConstant) CASE_SUBSECTION(MemberTypeXfbOutInfo, SectionXfbOutInfo) CASE_SUBSECTION(MemberTypeShaderGroup, SectionShaderGroup) CASE_SUBSECTION(MemberTypeRtState, SectionRtState) diff --git a/tool/vfx/vfxPipelineDoc.h b/tool/vfx/vfxPipelineDoc.h index 22c931fad1..6e04d98b44 100644 --- a/tool/vfx/vfxPipelineDoc.h +++ b/tool/vfx/vfxPipelineDoc.h @@ -47,8 +47,13 @@ class PipelineDocument : public Document { m_pipelineState.gfxPipelineInfo.options.optimizationLevel = 2; m_pipelineState.compPipelineInfo.options.optimizationLevel = 2; +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 71 memset(&m_pipelineState.gfxPipelineInfo.vbAddressLowBits, 0, sizeof(m_pipelineState.gfxPipelineInfo.vbAddressLowBits)); +#else + memset(&m_pipelineState.gfxPipelineInfo.glState.vbAddressLowBits, 0, + sizeof(m_pipelineState.gfxPipelineInfo.glState.vbAddressLowBits)); +#endif memset(&m_vertexInputState, 0, sizeof(m_vertexInputState)); }; diff --git a/tool/vfx/vfxSection.h b/tool/vfx/vfxSection.h index 3792ada3e7..89abb90f3f 100644 --- a/tool/vfx/vfxSection.h +++ b/tool/vfx/vfxSection.h @@ -67,8 +67,6 @@ enum SectionType : unsigned { SectionTypeResourceMapping, // Resource mapping section SectionTypeUniformConstantMapEntry, // UniformConstantMapEntry section SectionTypeUniformConstantMap, // UniformConstantMap section - SectionTypeUniformConstant, // UniformConstant section - SectionTypeApiXfbOutput, // ApiXfbOutput section // GL pipeline SectionTypeGlProgramParameter, // GL program parameter section SectionTypeGlGraphicsState, // GL graphic pipeline state section @@ -92,6 +90,7 @@ enum MemberType : unsigned { MemberTypeFVec4, // VFX member type: float vec4 MemberTypeF16Vec4, // VFX member type: float16 vec4 MemberTypeDVec2, // VFX member type: double vec2 + MemberTypeU8Array, // VFX member type: byte vector (dynamic array) MemberTypeIArray, // VFX member type: int vector (dynamic array) MemberTypeUArray, // VFX member type: uint vector (dynamic array) MemberTypeI64Array, // VFX member type: int64 vector (dynamic array) @@ -161,6 +160,17 @@ template struct GetMemberHelper struct GetSubMemberHelper; +template +struct GetSubMemberHelper { + static void *getMemberPtr(void *obj) { + T *t = static_cast(obj); + return &(t->*member1.*member2); + } +}; + +// ===================================================================================================================== template struct GetSubStateMemberHelper; template struct GetSubStateMemberHelper { static void *getMemberPtr(void *obj) { @@ -169,6 +179,16 @@ template struct GetSubState } }; +// ===================================================================================================================== +template struct GetSubStateSubMemberHelper; +template +struct GetSubStateSubMemberHelper { + static void *getMemberPtr(void *obj) { + T2 *t = static_cast(obj); + return &(t->getSubStateRef().*member1.*member2); + } +}; + // ===================================================================================================================== // Represents the info of section type struct SectionInfo { @@ -229,6 +249,23 @@ inline SectionInfo initSectionItemInfo(SectionType type, uint16_t propertyLo, ui tableItem.isSection = _isObject; \ } while (false) +// ===================================================================================================================== +// Initiates a state's member to address table +#define INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(T, name, submember, type, _isObject) \ + do { \ + addrTableInitializer.push_back(StrToMemberAddr()); \ + StrToMemberAddr &tableItem = addrTableInitializer.back(); \ + tableItem.memberName = STRING(submember); \ + if (!strncmp(tableItem.memberName, "m_", 2)) \ + tableItem.memberName += 2; \ + tableItem.getMember = \ + GetSubStateSubMemberHelper::getMemberPtr; \ + tableItem.memberType = type; \ + tableItem.arrayMaxSize = 1; \ + tableItem.isSection = _isObject; \ + } while (false) + // ===================================================================================================================== // Initiates a state's member to address table with explicit name #define INIT_STATE_MEMBER_EXPLICITNAME_TO_ADDR(T, name, member, getter, type, _isObject) \ @@ -443,8 +480,12 @@ bool Section::set(unsigned lineNum, const char *memberName, unsigned arrayIndex, std::string dummyMsg; result = getPtrOf(lineNum, memberName, true, arrayIndex, &memberPtr, &dummyMsg); VFX_ASSERT(result == true); - if (result) + if (result) { + if (sizeof(TValue) == 4) { + VFX_ASSERT((reinterpret_cast(memberPtr) & 0x3) == 0); + } *memberPtr = *value; + } return result; }; @@ -746,16 +787,20 @@ class SectionVertexInput : public Section { memset(&m_vkDivisorState, 0, sizeof(m_vkDivisorState)); m_vkDivisorState.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT; } - void getvbAddressLowBits(uint8_t *vbAddrLowBits) { memcpy(vbAddrLowBits, &m_vbAddressLowBits[0], 64); } + void getvbAddressLowBits(uint8_t *vbAddrLowBits) { + memcpy(vbAddrLowBits, &m_vbAddressLowBits[0], MaxInternalVertexBufferBindingCount); + } void getSubState(SubState &state) { m_vkBindings.resize(m_binding.size()); m_vkAttributes.resize(m_attribute.size()); m_vkDivisors.resize(m_divisor.size()); - m_vbAddressLowBits.resize(64); + m_vbAddressLowBits.resize(MaxInternalVertexBufferBindingCount); for (unsigned i = 0; i < m_attribute.size(); ++i) { m_attribute[i].getSubState(m_vkAttributes[i]); - m_attribute[i].getVbAddressLowBits(m_vbAddressLowBits[m_vkAttributes[i].binding]); + if (m_vkAttributes[i].binding < 64) { + m_attribute[i].getVbAddressLowBits(m_vbAddressLowBits[m_vkAttributes[i].binding]); + } } for (unsigned i = 0; i < m_binding.size(); ++i) diff --git a/tool/vfx/vfxVkSection.cpp b/tool/vfx/vfxVkSection.cpp index 76154ca8ac..595b98e3bb 100644 --- a/tool/vfx/vfxVkSection.cpp +++ b/tool/vfx/vfxVkSection.cpp @@ -39,8 +39,6 @@ class VkSectionParserInit { INIT_SECTION_INFO("missInfo", SectionTypeShaderInfo, ShaderStage::ShaderStageRayTracingMiss) INIT_SECTION_INFO("callInfo", SectionTypeShaderInfo, ShaderStage::ShaderStageRayTracingCallable) INIT_SECTION_INFO("ResourceMapping", SectionTypeResourceMapping, 0) - INIT_SECTION_INFO("UniformConstant", SectionTypeUniformConstant, 0) - INIT_SECTION_INFO("ApiXfbOutInfo", SectionTypeApiXfbOutput, 0) }; void initEnumMap() { diff --git a/tool/vfx/vfxVkSection.h b/tool/vfx/vfxVkSection.h index 4cca957f46..bb7ad97833 100644 --- a/tool/vfx/vfxVkSection.h +++ b/tool/vfx/vfxVkSection.h @@ -189,60 +189,27 @@ class SectionUniformConstantMap : public Section { }; // ===================================================================================================================== -// Represents default uniform constant map information in one pipeline -class SectionUniformConstant : public Section { +// Represents the sub section shader option +class SectionShaderOption : public Section { public: - typedef UniformConstantState SubState; + typedef Vkgc::PipelineShaderOptions SubState; - SectionUniformConstant() : Section(getAddrTable(), SectionTypeUniformConstant, "UniformConstant") { + SectionShaderOption() : Section(getAddrTable(), SectionTypeUnset, "options"), m_clientHash{} { memset(&m_state, 0, sizeof(m_state)); } - SubState &getSubStateRef() { return m_state; } void getSubState(SubState &state) { - m_uniformConstantMapData.resize(m_uniformConstantMaps.size()); - for (unsigned i = 0; i < m_uniformConstantMaps.size(); i++) { - auto &s = m_uniformConstantMaps[i]; - s.getSubState(m_uniformConstantMapData[i]); - m_uniformConstantMapPtr.push_back(&m_uniformConstantMapData[i]); - } - m_state.numUniformConstantMaps = m_uniformConstantMaps.size(); - m_state.uniformMaps = m_uniformConstantMapPtr.data(); + m_state.clientHash.lower = m_clientHash.i64Vec2[0]; + m_state.clientHash.upper = m_clientHash.i64Vec2[1]; state = m_state; - } - - static StrToMemberAddrArrayRef getAddrTable() { - static std::vector addrTable = []() { - std::vector addrTableInitializer; - INIT_MEMBER_DYNARRAY_NAME_TO_ADDR(SectionUniformConstant, m_uniformConstantMaps, MemberTypeUniformConstantMap, - true); - return addrTableInitializer; - }(); - return {addrTable.data(), addrTable.size()}; - } - -private: - SubState m_state; - std::vector m_uniformConstantMaps; - std::vector m_uniformConstantMapPtr; - std::vector m_uniformConstantMapData; -}; - -// ===================================================================================================================== -// Represents the sub section shader option -class SectionShaderOption : public Section { -public: - typedef Vkgc::PipelineShaderOptions SubState; - - SectionShaderOption() : Section(getAddrTable(), SectionTypeUnset, "options") { memset(&m_state, 0, sizeof(m_state)); } - - void getSubState(SubState &state) { state = m_state; }; + }; SubState &getSubStateRef() { return m_state; }; private: static StrToMemberAddrArrayRef getAddrTable() { static std::vector addrTable = []() { std::vector addrTableInitializer; + INIT_MEMBER_NAME_TO_ADDR(SectionShaderOption, m_clientHash, MemberTypeI64Vec2, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, trapPresent, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, debugMode, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, enablePerformanceData, MemberTypeBool, false); @@ -276,6 +243,7 @@ class SectionShaderOption : public Section { INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, disableFastMathFlags, MemberTypeInt, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, ldsSpillLimitDwords, MemberTypeInt, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, scalarizeWaterfallLoads, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, overrideForceThreadIdSwizzling, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, overrideShaderThreadGroupSizeX, MemberTypeInt, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, overrideShaderThreadGroupSizeY, MemberTypeInt, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, overrideShaderThreadGroupSizeZ, MemberTypeInt, false); @@ -284,6 +252,9 @@ class SectionShaderOption : public Section { INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, workaroundStorageImageFormats, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, workaroundInitializeOutputsToZero, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, disableFMA, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, disableReadFirstLaneWorkaround, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, backwardPropagateNoContract, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, forwardPropagateNoContract, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, constantBufferBindingOffset, MemberTypeInt, false); return addrTableInitializer; }(); @@ -291,6 +262,7 @@ class SectionShaderOption : public Section { } SubState m_state; + IUFValue m_clientHash; }; // ===================================================================================================================== @@ -478,6 +450,9 @@ class SectionPipelineOption : public Section { INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, scalarBlockLayout, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, includeIr, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, robustBufferAccess, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enableRelocatableShaderElf, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, disableImageResourceCheck, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enableScratchAccessBoundsChecks, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, reconfigWorkgroupLayout, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, forceCsThreadIdSwizzling, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, overrideThreadGroupSizeX, MemberTypeInt, false); @@ -495,11 +470,15 @@ class SectionPipelineOption : public Section { INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enableImplicitInvariantExports, MemberTypeBool, false); // One internal member INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, internalRtShaders, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enableRayQuery, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, optimizeTessFactor, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enableInterpModePatch, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, pageMigrationEnabled, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, replaceSetWithResourceType, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, disableSampleMask, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, buildResourcesDataForShaderModule, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, disableTruncCoordForGather, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enableCombinedTexture, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, vertex64BitsAttribSingleLoc, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enableFragColor, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, disableBaseVertex, MemberTypeBool, false); @@ -628,6 +607,8 @@ class SectionRayTracingShaderExportConfig : public Section { INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingShaderExportConfig, enableUniformNoReturn, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingShaderExportConfig, enableTraceRayArgsInLds, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingShaderExportConfig, enableReducedLinkageOpt, MemberTypeBool, + false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingShaderExportConfig, readsDispatchRaysIndex, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingShaderExportConfig, enableDynamicLaunch, MemberTypeBool, false); @@ -784,71 +765,177 @@ class SectionRtState : public Section { std::vector m_bvhResDesc; }; +// ===================================================================================================================== +// Represents the sub section XfbOutInfo +class SectionXfbOutInfo : public Section { +public: + typedef Vkgc::XfbOutInfo SubState; + + SectionXfbOutInfo() : Section(getAddrTable(), SectionTypeUnset, "XfbOutInfo") { + memset(&m_state, 0, sizeof(m_state)); + } + + void getSubState(SubState &state) { state = m_state; }; + SubState &getSubStateRef() { return m_state; }; + +private: + static StrToMemberAddrArrayRef getAddrTable() { + static std::vector addrTable = []() { + std::vector addrTableInitializer; + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionXfbOutInfo, isBuiltIn, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionXfbOutInfo, location, MemberTypeInt, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionXfbOutInfo, component, MemberTypeInt, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionXfbOutInfo, xfbBuffer, MemberTypeInt, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionXfbOutInfo, xfbOffset, MemberTypeInt, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionXfbOutInfo, xfbStride, MemberTypeInt, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionXfbOutInfo, streamId, MemberTypeInt, false); + return addrTableInitializer; + }(); + return {addrTable.data(), addrTable.size()}; + } + + SubState m_state; +}; + // ===================================================================================================================== // Represents the section graphics state class SectionGraphicsState : public Section { public: - typedef GraphicsPipelineState SubState; + typedef Vkgc::GraphicsPipelineBuildInfo SubState; - SectionGraphicsState() : Section(getAddrTable(), SectionTypeGraphicsState, nullptr) { + SectionGraphicsState() + : Section(getAddrTable(), SectionTypeGraphicsState, nullptr), m_pUniformMaps{}, m_uniformMaps{} { memset(&m_state, 0, sizeof(m_state)); - tessLevelInner[0] = -1.0f; - tessLevelInner[1] = -1.0f; - tessLevelOuter[0] = -1.0f; - tessLevelOuter[1] = -1.0f; - tessLevelOuter[2] = -1.0f; + + m_usrClipPlaneMask = 0; + m_tessLevelInner[0] = -1.0f; + m_tessLevelInner[1] = -1.0f; + m_tessLevelOuter[0] = -1.0f; + m_tessLevelOuter[1] = -1.0f; + m_tessLevelOuter[2] = -1.0f; + m_tessLevelOuter[3] = -1.0f; + m_clientMetadata = &m_clientMetadataBufMem; + m_forceDisableStreamOut = false; +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 71 + m_state.ppUniformMaps = &m_pUniformMaps[0]; +#else + m_state.glState.ppUniformMaps = &m_pUniformMaps[0]; +#endif } static StrToMemberAddrArrayRef getAddrTable() { static std::vector addrTable = []() { std::vector addrTableInitializer; - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, topology, MemberTypeEnum, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, provokingVertexMode, MemberTypeEnum, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, patchControlPoints, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, deviceIndex, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, disableVertexReuse, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, depthClipEnable, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, rasterizerDiscardEnable, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, perSampleShading, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, numSamples, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, pixelShaderSamples, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, samplePatternIdx, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, dynamicSampleInfo, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, rasterStream, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, usrClipPlaneMask, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, alphaToCoverageEnable, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, dualSourceBlendEnable, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, dualSourceBlendDynamic, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, switchWinding, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, enableMultiView, MemberTypeInt, false); - INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsState, m_options, MemberTypePipelineOption, true); - INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsState, m_nggState, MemberTypeNggState, true); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, iaState, topology, MemberTypeEnum, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, iaState, patchControlPoints, MemberTypeInt, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, iaState, deviceIndex, MemberTypeInt, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, iaState, disableVertexReuse, MemberTypeBool, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, iaState, switchWinding, MemberTypeBool, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, iaState, enableMultiView, MemberTypeBool, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, iaState, useVertexBufferDescArray, MemberTypeBool, + false); + INIT_MEMBER_ARRAY_NAME_TO_ADDR(SectionGraphicsState, m_tessLevelInner, MemberTypeFloat, 2, false); + INIT_MEMBER_ARRAY_NAME_TO_ADDR(SectionGraphicsState, m_tessLevelOuter, MemberTypeFloat, 4, false); + + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, vpState, depthClipEnable, MemberTypeBool, false); + + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, rsState, rasterizerDiscardEnable, MemberTypeBool, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, rsState, innerCoverage, MemberTypeBool, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, rsState, perSampleShading, MemberTypeBool, false); + INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsState, m_usrClipPlaneMask, MemberTypeInt, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, rsState, numSamples, MemberTypeInt, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, rsState, pixelShaderSamples, MemberTypeInt, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, rsState, samplePatternIdx, MemberTypeInt, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, rsState, dynamicSampleInfo, MemberTypeBool, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, rsState, rasterStream, MemberTypeInt, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, rsState, provokingVertexMode, MemberTypeEnum, false); + + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, cbState, alphaToCoverageEnable, MemberTypeBool, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, cbState, dualSourceBlendEnable, MemberTypeBool, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, cbState, dualSourceBlendDynamic, MemberTypeBool, false); INIT_MEMBER_ARRAY_NAME_TO_ADDR(SectionGraphicsState, m_colorBuffer, MemberTypeColorBufferItem, Vkgc::MaxColorTargets, true); + INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsState, m_nggState, MemberTypeNggState, true); + INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsState, m_options, MemberTypePipelineOption, true); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, unlinked, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, dynamicVertexStride, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, enableUberFetchShader, MemberTypeBool, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, enableEarlyCompile, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, enableColorExportShader, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, enableEarlyCompile, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, useSoftwareVertexBufferDescriptors, MemberTypeBool, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, vbAddressLowBitsKnown, MemberTypeBool, false); INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsState, m_shaderLibrary, MemberTypeString, false); INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsState, m_rtState, MemberTypeRtState, true); - INIT_MEMBER_ARRAY_NAME_TO_ADDR(SectionGraphicsState, tessLevelInner, MemberTypeFloat, 2, false); - INIT_MEMBER_ARRAY_NAME_TO_ADDR(SectionGraphicsState, tessLevelOuter, MemberTypeFloat, 4, false); - + INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsState, m_clientMetadata, MemberTypeU8Array, false); + INIT_MEMBER_ARRAY_NAME_TO_ADDR(SectionGraphicsState, m_uniformConstantMaps, MemberTypeUniformConstantMap, + Vkgc::ShaderStageGfxCount, true); +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 71 + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, originUpperLeft, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, vbAddressLowBitsKnown, MemberTypeBool, false); +#else + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, glState, originUpperLeft, MemberTypeBool, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, glState, vbAddressLowBitsKnown, MemberTypeBool, false); +#endif + INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsState, m_forceDisableStreamOut, MemberTypeBool, false); +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 70 + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, apiXfbOutData, forceEnablePrimStats, MemberTypeBool, + false); +#endif + INIT_MEMBER_DYNARRAY_NAME_TO_ADDR(SectionGraphicsState, m_xfbOutInfo, MemberTypeXfbOutInfo, true); return addrTableInitializer; }(); return {addrTable.data(), addrTable.size()}; } void getSubState(const std::string &docFilename, SubState &state, std::string *errorMsg) { - for (unsigned i = 0; i < Vkgc::MaxColorTargets; ++i) - m_colorBuffer[i].getSubState(m_state.colorBuffer[i]); + for (unsigned i = 0; i < Vkgc::MaxColorTargets; ++i) { + ColorBuffer colorBuffer = {}; + m_colorBuffer[i].getSubState(colorBuffer); + m_state.cbState.target[i].blendEnable = colorBuffer.blendEnable; + m_state.cbState.target[i].blendSrcAlphaToColor = colorBuffer.blendSrcAlphaToColor; + m_state.cbState.target[i].channelWriteMask = colorBuffer.channelWriteMask; + m_state.cbState.target[i].format = colorBuffer.format; + } m_options.getSubState(m_state.options); m_nggState.getSubState(m_state.nggState); - memcpy(m_state.tessLevelInner, tessLevelInner, sizeof(tessLevelInner)); - memcpy(m_state.tessLevelOuter, tessLevelOuter, sizeof(tessLevelOuter)); +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 71 + auto pGlState = &m_state; +#else + auto pGlState = &m_state.glState; +#endif + for (unsigned i = 0; i < Vkgc::ShaderStageGfxCount; i++) { + m_uniformConstantMaps[i].getSubState(m_uniformMaps[i]); + if (m_uniformMaps[i].numUniformConstants > 0) { + pGlState->ppUniformMaps[pGlState->numUniformConstantMaps++] = &m_uniformMaps[i]; + } + } + + pGlState->apiXfbOutData.forceDisableStreamOut = m_forceDisableStreamOut; + if (m_xfbOutInfo.size() > 0) { + pGlState->apiXfbOutData.numXfbOutInfo = static_cast(m_xfbOutInfo.size()); + m_xfbOutInfoData.resize(pGlState->apiXfbOutData.numXfbOutInfo); + for (unsigned i = 0; i < pGlState->apiXfbOutData.numXfbOutInfo; ++i) + m_xfbOutInfo[i].getSubState(m_xfbOutInfoData[i]); + pGlState->apiXfbOutData.pXfbOutInfos = &m_xfbOutInfoData[0]; + } + + if (m_clientMetadataBufMem.size() > 0) { + m_state.clientMetadataSize = m_clientMetadataBufMem.size(); + m_state.pClientMetadata = m_clientMetadataBufMem.data(); + } + + m_state.rsState.usrClipPlaneMask = m_usrClipPlaneMask; + + if ((m_tessLevelInner[0] != -1.0f) || (m_tessLevelOuter[0] != -1.0f)) { + m_tessLevel.inner[0] = m_tessLevelInner[0]; + m_tessLevel.inner[1] = m_tessLevelInner[1]; + m_tessLevel.outer[0] = m_tessLevelOuter[0]; + m_tessLevel.outer[1] = m_tessLevelOuter[1]; + m_tessLevel.outer[2] = m_tessLevelOuter[2]; + m_tessLevel.outer[3] = m_tessLevelOuter[3]; + m_state.iaState.tessLevel = &m_tessLevel; + } + state = m_state; #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 @@ -870,31 +957,44 @@ class SectionGraphicsState : public Section { SubState m_state; SectionColorBuffer m_colorBuffer[Vkgc::MaxColorTargets]; // Color buffer SectionPipelineOption m_options; + Vkgc::UniformConstantMap *m_pUniformMaps[Vkgc::ShaderStageGfxCount]; + Vkgc::UniformConstantMap m_uniformMaps[Vkgc::ShaderStageGfxCount]; + SectionUniformConstantMap m_uniformConstantMaps[Vkgc::ShaderStageGfxCount]; std::string m_shaderLibrary; std::vector m_shaderLibraryBytes; + std::vector *m_clientMetadata; + std::vector m_clientMetadataBufMem; SectionRtState m_rtState; - float tessLevelInner[2]; - float tessLevelOuter[4]; + bool m_forceDisableStreamOut; + float m_tessLevelInner[2]; + float m_tessLevelOuter[4]; + Vkgc::TessellationLevel m_tessLevel; + std::vector m_xfbOutInfo; + std::vector m_xfbOutInfoData; + unsigned m_usrClipPlaneMask; }; // ===================================================================================================================== // Represents the section compute state class SectionComputeState : public Section { public: - typedef ComputePipelineState SubState; + typedef Vkgc::ComputePipelineBuildInfo SubState; SectionComputeState() : Section(getAddrTable(), SectionTypeComputeState, nullptr) { memset(&m_state, 0, sizeof(m_state)); + m_clientMetadata = &m_clientMetadataBufMem; } static StrToMemberAddrArrayRef getAddrTable() { static std::vector addrTable = []() { std::vector addrTableInitializer; INIT_STATE_MEMBER_NAME_TO_ADDR(SectionComputeState, deviceIndex, MemberTypeInt, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionComputeState, unlinked, MemberTypeBool, false); INIT_MEMBER_NAME_TO_ADDR(SectionComputeState, m_options, MemberTypePipelineOption, true); INIT_MEMBER_NAME_TO_ADDR(SectionComputeState, m_shaderLibrary, MemberTypeString, false); INIT_MEMBER_NAME_TO_ADDR(SectionComputeState, m_rtState, MemberTypeRtState, true); - + INIT_MEMBER_NAME_TO_ADDR(SectionComputeState, m_clientMetadata, MemberTypeU8Array, false); + INIT_MEMBER_NAME_TO_ADDR(SectionComputeState, m_uniformConstantMap, MemberTypeUniformConstantMap, true); return addrTableInitializer; }(); return {addrTable.data(), addrTable.size()}; @@ -915,6 +1015,16 @@ class SectionComputeState : public Section { } #endif m_rtState.getSubState(docFilename, state.rtState, errorMsg); + + m_uniformConstantMap.getSubState(m_uniformMap); + if (m_uniformMap.numUniformConstants > 0) { + state.pUniformMap = &m_uniformMap; + } + + if (m_clientMetadataBufMem.size() > 0) { + state.clientMetadataSize = m_clientMetadataBufMem.size(); + state.pClientMetadata = m_clientMetadataBufMem.data(); + } } SubState &getSubStateRef() { return m_state; } @@ -924,36 +1034,46 @@ class SectionComputeState : public Section { std::string m_shaderLibrary; std::vector m_shaderLibraryBytes; SectionRtState m_rtState; + Vkgc::UniformConstantMap m_uniformMap; + SectionUniformConstantMap m_uniformConstantMap; + std::vector *m_clientMetadata; + std::vector m_clientMetadataBufMem; }; // ===================================================================================================================== // Represents the section ray tracing state class SectionRayTracingState : public Section { public: - typedef RayTracingPipelineState SubState; + typedef Vkgc::RayTracingPipelineBuildInfo SubState; SectionRayTracingState() : Section(getAddrTable(), SectionTypeComputeState, nullptr) { memset(&m_state, 0, sizeof(m_state)); + m_clientMetadata = &m_clientMetadataBufMem; } static StrToMemberAddrArrayRef getAddrTable() { static std::vector addrTable = []() { std::vector addrTableInitializer; INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, deviceIndex, MemberTypeInt, false); - INIT_MEMBER_NAME_TO_ADDR(SectionRayTracingState, m_options, MemberTypePipelineOption, true); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, deviceCount, MemberTypeInt, false); + INIT_MEMBER_DYNARRAY_NAME_TO_ADDR(SectionRayTracingState, m_groups, MemberTypeShaderGroup, true); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, libraryMode, MemberTypeInt, false); #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 INIT_MEMBER_NAME_TO_ADDR(SectionRayTracingState, m_shaderTraceRay, MemberTypeString, false); #endif + INIT_MEMBER_NAME_TO_ADDR(SectionRayTracingState, m_options, MemberTypePipelineOption, true); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, maxRecursionDepth, MemberTypeInt, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, indirectStageMask, MemberTypeInt, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, mode, MemberTypeInt, false); INIT_MEMBER_NAME_TO_ADDR(SectionRayTracingState, m_rtState, MemberTypeRtState, true); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, payloadSizeMaxInLib, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, attributeSizeMaxInLib, MemberTypeInt, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, hasPipelineLibrary, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, pipelineLibStageMask, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, gpurtFeatureFlags, MemberTypeInt, false); + + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, payloadSizeMaxInLib, MemberTypeInt, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, attributeSizeMaxInLib, MemberTypeInt, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, isReplay, MemberTypeBool, false); + INIT_MEMBER_NAME_TO_ADDR(SectionRayTracingState, m_clientMetadata, MemberTypeU8Array, false); return addrTableInitializer; }(); return {addrTable.data(), addrTable.size()}; @@ -978,6 +1098,10 @@ class SectionRayTracingState : public Section { } #endif m_rtState.getSubState(docFilename, m_state.rtState, errorMsg); + if (m_clientMetadataBufMem.size() > 0) { + m_state.clientMetadataSize = m_clientMetadataBufMem.size(); + m_state.pClientMetadata = m_clientMetadataBufMem.data(); + } state = m_state; }; SubState &getSubStateRef() { return m_state; }; @@ -992,80 +1116,8 @@ class SectionRayTracingState : public Section { std::vector m_groups; std::vector m_vkShaderGroups; std::vector m_traceRayBinary; -}; - -// ===================================================================================================================== -// Represents the sub section XfbOutInfo -class SectionXfbOutInfo : public Section { -public: - typedef Vkgc::XfbOutInfo SubState; - - SectionXfbOutInfo() : Section(getAddrTable(), SectionTypeUnset, "XfbOutInfo") { - memset(&m_state, 0, sizeof(m_state)); - } - - void getSubState(SubState &state) { state = m_state; }; - SubState &getSubStateRef() { return m_state; }; - -private: - static StrToMemberAddrArrayRef getAddrTable() { - static std::vector addrTable = []() { - std::vector addrTableInitializer; - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionXfbOutInfo, isBuiltIn, MemberTypeBool, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionXfbOutInfo, location, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionXfbOutInfo, component, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionXfbOutInfo, xfbBuffer, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionXfbOutInfo, xfbOffset, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionXfbOutInfo, xfbStride, MemberTypeInt, false); - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionXfbOutInfo, streamId, MemberTypeInt, false); - return addrTableInitializer; - }(); - return {addrTable.data(), addrTable.size()}; - } - - SubState m_state; -}; - -// ===================================================================================================================== -// Represents the sub section ApiXfb -class SectionApiXfbOutput : public Section { -public: - typedef Vkgc::ApiXfbOutData SubState; - SectionApiXfbOutput() : Section(getAddrTable(), SectionTypeApiXfbOutput, "ApiXfbOutput") { - memset(&m_state, 0, sizeof(m_state)); - } - - void getSubState(SubState &state) { - memset(&state, 0, sizeof(SubState)); - state = m_state; - if (m_xfbOutInfo.size() > 0) { - state.numXfbOutInfo = static_cast(m_xfbOutInfo.size()); - m_xfbOutInfoData.resize(state.numXfbOutInfo); - for (unsigned i = 0; i < state.numXfbOutInfo; ++i) - m_xfbOutInfo[i].getSubState(m_xfbOutInfoData[i]); - state.pXfbOutInfos = &m_xfbOutInfoData[0]; - } - } - - SubState &getSubStateRef() { return m_state; }; - -private: - static StrToMemberAddrArrayRef getAddrTable() { - static std::vector addrTable = []() { - std::vector addrTableInitializer; - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionApiXfbOutput, forceDisableStreamOut, MemberTypeBool, false); -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 70 - INIT_STATE_MEMBER_NAME_TO_ADDR(SectionApiXfbOutput, forceEnablePrimStats, MemberTypeBool, false); -#endif - INIT_MEMBER_DYNARRAY_NAME_TO_ADDR(SectionApiXfbOutput, m_xfbOutInfo, MemberTypeXfbOutInfo, true); - return addrTableInitializer; - }(); - return {addrTable.data(), addrTable.size()}; - } - - SubState m_state; - std::vector m_xfbOutInfo; // Contains the info of the capture output - std::vector m_xfbOutInfoData; + std::vector *m_clientMetadata; + std::vector m_clientMetadataBufMem; }; } // namespace Vfx diff --git a/util/extensions.txt b/util/extensions.txt index 97eef1c0b3..aeb04d5f8c 100644 --- a/util/extensions.txt +++ b/util/extensions.txt @@ -49,7 +49,5 @@ SPV_NV_shader_atomic_float SPV_NV_compute_shader_derivatives SPV_KHR_maximal_reconvergence SPV_KHR_expect_assume -#if VKI_KHR_SHADER_QUAD_CONTROL SPV_KHR_shader_quad_control -#endif SPV_KHR_subgroup_rotate diff --git a/util/gpurtshim/GpurtShim.cpp b/util/gpurtshim/GpurtShim.cpp index 30964e08d7..e08a889910 100644 --- a/util/gpurtshim/GpurtShim.cpp +++ b/util/gpurtshim/GpurtShim.cpp @@ -99,6 +99,7 @@ void gpurt::getFuncTable(RtIpVersion rtIpVersion, GpurtFuncTable &table) { unmangleDxilName(table.pFunc[RT_ENTRY_TRACE_RAY_INLINE], gpurtTable.rayQuery.pTraceRayInline); unmangleDxilName(table.pFunc[RT_ENTRY_TRACE_RAY_HIT_TOKEN], gpurtTable.traceRay.pTraceRayUsingHitToken); unmangleDxilName(table.pFunc[RT_ENTRY_RAY_QUERY_PROCEED], gpurtTable.rayQuery.pProceed); + unmangleDxilName(table.pFunc[RT_ENTRY_GET_INSTANCE_NODE], gpurtTable.rayQuery.pGet64BitInstanceNodePtr); unmangleDxilName(table.pFunc[RT_ENTRY_INSTANCE_INDEX], gpurtTable.intrinsic.pGetInstanceIndex); unmangleDxilName(table.pFunc[RT_ENTRY_INSTANCE_ID], gpurtTable.intrinsic.pGetInstanceID); unmangleDxilName(table.pFunc[RT_ENTRY_OBJECT_TO_WORLD_TRANSFORM], gpurtTable.intrinsic.pGetObjectToWorldTransform); diff --git a/version/CMakeLists.txt b/version/CMakeLists.txt index f29c12e7cf..0342598380 100644 --- a/version/CMakeLists.txt +++ b/version/CMakeLists.txt @@ -47,10 +47,10 @@ target_compile_definitions(llpc_version INTERFACE CHIP_HDR_NAVI22 CHIP_HDR_NAVI23 CHIP_HDR_NAVI24 -#if LLPC_BUILD_NAVI31 +#if VKI_BUILD_NAVI31 CHIP_HDR_NAVI31 #endif -#if LLPC_BUILD_NAVI33 +#if VKI_BUILD_NAVI33 CHIP_HDR_NAVI33 #endif CHIP_HDR_RENOIR) @@ -80,7 +80,7 @@ option(LLPC_BUILD_NAVI12 "LLPC support for NAVI12?" ON) if (LLPC_BUILD_NAVI12) target_compile_definitions(llpc_version INTERFACE LLPC_BUILD_NAVI12 CHIP_HDR_NAVI12) endif() -#if LLPC_BUILD_NAVI32 +#if VKI_BUILD_NAVI32 option(LLPC_BUILD_NAVI32 "LLPC support for NAVI32?" ON) if (LLPC_BUILD_NAVI32) target_compile_definitions(llpc_version INTERFACE LLPC_BUILD_NAVI32 CHIP_HDR_NAVI32) @@ -98,7 +98,7 @@ option(LLPC_BUILD_MENDOCINO "LLPC support for MENDOCINO?" ON) if (LLPC_BUILD_MENDOCINO) target_compile_definitions(llpc_version INTERFACE LLPC_BUILD_MENDOCINO CHIP_HDR_MENDOCINO) endif() -#if LLPC_BUILD_PHOENIX1 +#if VKI_BUILD_PHOENIX1 option(LLPC_BUILD_PHOENIX1 "LLPC support for PHOENIX1?" ON) if (LLPC_BUILD_PHOENIX1) target_compile_definitions(llpc_version INTERFACE LLPC_BUILD_PHOENIX1 CHIP_HDR_PHOENIX1) diff --git a/version/include/llpc/GpurtIntrinsics.h b/version/include/llpc/GpurtIntrinsics.h index 16b05c7334..90072cc393 100644 --- a/version/include/llpc/GpurtIntrinsics.h +++ b/version/include/llpc/GpurtIntrinsics.h @@ -62,6 +62,8 @@ #endif #endif +#define CONTINUATIONS_LGC_STACK_LOWERING 1 + //===================================================================================================================== // Continuation intrinsics // @@ -103,9 +105,9 @@ // Enqueue // ------- // Enqueue just jumps to the function at the given address. Enqueue is noreturn, and following code is unreachable. -// _AmdEnqueue*(uint64_t addr, uint32_t csp, ...) +// _AmdEnqueue*(uint64_t addr, ...) #define DECLARE_ENQUEUE(Suffix, ...) GPURT_DECL \ - void _AmdEnqueue##Suffix(uint64_t addr, uint32_t csp, __VA_ARGS__) DUMMY_VOID_FUNC + void _AmdEnqueue##Suffix(uint64_t addr, __VA_ARGS__) DUMMY_VOID_FUNC // // WaitEnqueue // ----------- @@ -113,7 +115,7 @@ // Generic function arguments start with the third argument. // _AmdWaitEnqueue*(uint64_t addr, uint64_t waitMask, uint32_t csp, ...) #define DECLARE_WAIT_ENQUEUE(Suffix, ...) GPURT_DECL \ - void _AmdWaitEnqueue##Suffix(uint64_t addr, uint64_t waitMask, uint32_t csp, __VA_ARGS__) DUMMY_VOID_FUNC + void _AmdWaitEnqueue##Suffix(uint64_t addr, uint64_t waitMask, __VA_ARGS__) DUMMY_VOID_FUNC // // Complete // -------- @@ -164,7 +166,7 @@ GPURT_DECL DXILShaderKind _AmdGetShaderKind() DUMMY_GENERIC_FUNC(DXILShaderKind: //===================================================================================================================== // ContStackAlloc // Allocate space on the continuation stack. -// Arguments are the current stack pointer and the size of the allocation. +// Argument is the size of the allocation. // Returns the address of the allocation. // // This is equivalent to @@ -173,7 +175,7 @@ GPURT_DECL DXILShaderKind _AmdGetShaderKind() DUMMY_GENERIC_FUNC(DXILShaderKind: // // In addition, it tells the compiler and driver about this allocation, so they can reserve enough memory for the // stack. -GPURT_DECL uint32_t _AmdContStackAlloc(GPURT_INOUT uint32_t csp, uint32_t byteSize) DUMMY_GENERIC_FUNC(0) +GPURT_DECL uint32_t _AmdContStackAlloc(uint32_t byteSize) DUMMY_GENERIC_FUNC(0) //===================================================================================================================== // Free the current continuation stack @@ -192,6 +194,11 @@ GPURT_DECL uint32_t _AmdContStackGetPtr() DUMMY_GENERIC_FUNC(0) #define DECLARE_CONT_STACK_LOAD(Suffix, ReturnTy) GPURT_DECL \ ReturnTy _AmdContStackLoad##Suffix(uint32_t addr) DUMMY_GENERIC_FUNC((ReturnTy)0) +//===================================================================================================================== +// Load data from a given continuation stack address, mark the load as last use +#define DECLARE_CONT_STACK_LOAD_LAST_USE(Suffix, ReturnTy) GPURT_DECL \ + ReturnTy _AmdContStackLoadLastUse##Suffix(uint32_t addr) DUMMY_GENERIC_FUNC((ReturnTy)0) + //===================================================================================================================== // Store data to a given continuation stack address #define DECLARE_CONT_STACK_STORE(Suffix, ...) GPURT_DECL \ diff --git a/version/include/llpcVersion.h.in b/version/include/llpcVersion.h.in index 6a027da047..916b1090b3 100644 --- a/version/include/llpcVersion.h.in +++ b/version/include/llpcVersion.h.in @@ -37,6 +37,8 @@ // %Version History // | %Version | Change Description | // | -------- | ----------------------------------------------------------------------------------------------------- | +// | 71.1 | Add GraphNodeName to the GraphicsPipelineBuildInfo | +// | 71.0 | Add glState to GraphicsPipelineBuildInfo. And move OGL status to glState. | // | 70.5 | Add vbAddressLowBitsKnown to Options. Add vbAddrLowBits to VertexInputDescription. | // | Add vbAddressLowBitsKnown and vbAddressLowBits to GraphicsPipelineBuildInfo. | // | Add columnCount to ResourceNodeData. | @@ -173,7 +175,7 @@ #define LLPC_INTERFACE_MAJOR_VERSION 70 /// LLPC minor interface version. -#define LLPC_INTERFACE_MINOR_VERSION 5 +#define LLPC_INTERFACE_MINOR_VERSION 1 /// The client's LLPC major interface version #ifndef LLPC_CLIENT_INTERFACE_MAJOR_VERSION