From 5579b5e4cafe9804def5dc8389b731846f2a28da Mon Sep 17 00:00:00 2001 From: qiaojbao Date: Thu, 22 Feb 2024 16:44:34 +0800 Subject: [PATCH] Update llpc from commit a1369c61 --- compilerutils/lib/CompilerUtils.cpp | 20 +- include/khronos/spirv/spirv.hpp | 43 ++- include/vkgcDefs.h | 18 +- lgc/CMakeLists.txt | 2 +- lgc/builder/BuilderRecorder.cpp | 23 -- lgc/builder/BuilderRecorder.h | 2 - lgc/builder/BuilderReplayer.cpp | 17 +- lgc/builder/DescBuilder.cpp | 12 +- lgc/builder/ImageBuilder.cpp | 49 +-- lgc/builder/InOutBuilder.cpp | 64 ++-- lgc/builder/SubgroupBuilder.cpp | 27 -- lgc/builder/YCbCrAddressHandler.cpp | 23 -- lgc/builder/YCbCrConverter.cpp | 11 - lgc/elfLinker/ColorExportShader.cpp | 35 +- lgc/elfLinker/ColorExportShader.h | 2 + lgc/elfLinker/ElfLinker.cpp | 18 +- lgc/elfLinker/FetchShader.cpp | 346 ------------------ lgc/elfLinker/FetchShader.h | 101 ----- lgc/elfLinker/GlueShader.cpp | 9 - lgc/elfLinker/GlueShader.h | 4 - lgc/elfLinker/NullFragmentShader.cpp | 15 +- lgc/elfLinker/NullFragmentShader.h | 1 - lgc/include/lgc/builder/BuilderImpl.h | 15 +- lgc/include/lgc/builder/BuilderReplayer.h | 2 - lgc/include/lgc/patch/FragColorExport.h | 25 +- .../lgc/patch/LowerCooperativeMatrix.h | 34 +- lgc/include/lgc/patch/LowerGpuRt.h | 2 + lgc/include/lgc/patch/LowerSubgroupOps.h | 67 ++++ lgc/include/lgc/patch/PatchCheckShaderCache.h | 2 - lgc/include/lgc/patch/PatchCopyShader.h | 2 - lgc/include/lgc/patch/PatchEntryPointMutate.h | 2 - lgc/include/lgc/patch/PatchImageDerivatives.h | 2 - lgc/include/lgc/patch/PatchImageOpCollect.h | 2 - .../lgc/patch/PatchInOutImportExport.h | 3 - .../patch/PatchInitializeWorkgroupMemory.h | 2 - lgc/include/lgc/patch/PatchInvariantLoads.h | 2 - lgc/include/lgc/patch/PatchLlvmIrInclusion.h | 2 - lgc/include/lgc/patch/PatchLoadScalarizer.h | 2 - lgc/include/lgc/patch/PatchLoopMetadata.h | 2 - lgc/include/lgc/patch/PatchPeepholeOpt.h | 2 - .../lgc/patch/PatchPreparePipelineAbi.h | 3 - lgc/include/lgc/patch/PatchResourceCollect.h | 2 - .../lgc/patch/PatchSetupTargetFeatures.h | 2 - lgc/include/lgc/patch/PatchWorkarounds.h | 2 - lgc/include/lgc/patch/VertexFetch.h | 2 - lgc/include/lgc/state/AbiUnlinked.h | 1 - lgc/include/lgc/state/PalMetadata.h | 28 +- lgc/include/lgc/state/PipelineShaders.h | 1 - lgc/include/lgc/state/PipelineState.h | 3 +- lgc/include/lgc/state/TargetInfo.h | 11 - lgc/interface/lgc/Builder.h | 15 +- lgc/interface/lgc/ElfLinker.h | 3 +- lgc/interface/lgc/LgcDialect.td | 161 ++++++-- lgc/patch/FragColorExport.cpp | 189 +++++----- lgc/patch/LowerCooperativeMatrix.cpp | 258 ++++++++++++- lgc/patch/LowerGpuRt.cpp | 32 ++ lgc/patch/LowerSubgroupOps.cpp | 149 ++++++++ lgc/patch/NggPrimShader.cpp | 45 --- lgc/patch/PassRegistry.inc | 1 + lgc/patch/Patch.cpp | 2 + lgc/patch/PatchCheckShaderCache.cpp | 17 +- lgc/patch/PatchCopyShader.cpp | 16 +- lgc/patch/PatchEntryPointMutate.cpp | 22 +- lgc/patch/PatchImageDerivatives.cpp | 31 +- lgc/patch/PatchImageOpCollect.cpp | 13 +- lgc/patch/PatchInOutImportExport.cpp | 66 +--- lgc/patch/PatchInitializeWorkgroupMemory.cpp | 19 +- lgc/patch/PatchInvariantLoads.cpp | 61 ++- lgc/patch/PatchLlvmIrInclusion.cpp | 17 +- lgc/patch/PatchLoadScalarizer.cpp | 15 +- lgc/patch/PatchLoopMetadata.cpp | 17 +- lgc/patch/PatchNullFragShader.cpp | 16 +- lgc/patch/PatchNullFragShader.h | 2 - lgc/patch/PatchPeepholeOpt.cpp | 13 +- lgc/patch/PatchPreparePipelineAbi.cpp | 42 +-- lgc/patch/PatchResourceCollect.cpp | 50 +-- lgc/patch/PatchSetupTargetFeatures.cpp | 12 +- lgc/patch/PatchWorkarounds.cpp | 13 +- lgc/patch/ShaderInputs.cpp | 6 - lgc/patch/ShaderMerger.cpp | 36 +- lgc/patch/ShaderMerger.h | 1 - lgc/patch/TcsPassthroughShader.cpp | 4 + lgc/patch/VertexFetch.cpp | 119 ++---- lgc/state/PalMetadata.cpp | 51 +-- lgc/state/PipelineShaders.cpp | 14 +- lgc/state/PipelineState.cpp | 14 +- lgc/state/TargetInfo.cpp | 66 +--- lgc/test/FetchShaderSingleInput.lgc | 151 -------- lgc/test/IntVectorVertexInput.lgc | 106 ------ lgc/test/PhiWithArgument.lgc | 34 +- lgc/test/TaskShaderOps.lgc | 4 +- lgc/test/UnlinkedTessFetches.lgc | 69 ---- lgc/test/UnlinkedVsGsInputs.lgc | 80 ---- lgc/util/GfxRegHandler.cpp | 32 -- llpc/CMakeLists.txt | 12 +- llpc/context/llpcCompiler.cpp | 174 ++++----- llpc/context/llpcCompiler.h | 1 - llpc/context/llpcComputeContext.cpp | 1 + llpc/context/llpcContext.cpp | 78 ++++ llpc/context/llpcContext.h | 16 +- llpc/context/llpcGraphicsContext.cpp | 6 +- llpc/context/llpcPipelineContext.cpp | 35 +- llpc/context/llpcPipelineContext.h | 7 +- llpc/context/llpcRayTracingContext.cpp | 1 + llpc/docs/amdllpc.md | 1 - llpc/lower/PrepareContinuations.cpp | 80 ++++ llpc/lower/PrepareContinuations.h | 44 +++ llpc/lower/llpcSpirvLower.cpp | 14 +- llpc/lower/llpcSpirvLower.h | 12 +- llpc/lower/llpcSpirvLowerAccessChain.cpp | 11 +- llpc/lower/llpcSpirvLowerAccessChain.h | 2 - llpc/lower/llpcSpirvLowerCfgMerges.cpp | 49 ++- llpc/lower/llpcSpirvLowerCfgMerges.h | 1 - llpc/lower/llpcSpirvLowerGlobal.cpp | 13 +- llpc/lower/llpcSpirvLowerGlobal.h | 1 - llpc/lower/llpcSpirvLowerInstMetaRemove.cpp | 12 +- llpc/lower/llpcSpirvLowerInstMetaRemove.h | 1 - llpc/lower/llpcSpirvLowerMath.cpp | 53 +-- llpc/lower/llpcSpirvLowerMath.h | 7 - llpc/lower/llpcSpirvLowerMemoryOp.cpp | 11 +- llpc/lower/llpcSpirvLowerMemoryOp.h | 1 - llpc/lower/llpcSpirvLowerRayQuery.cpp | 11 +- llpc/lower/llpcSpirvLowerRayQuery.h | 1 - .../llpcSpirvLowerRayQueryPostInline.cpp | 11 +- llpc/lower/llpcSpirvLowerRayQueryPostInline.h | 1 - llpc/lower/llpcSpirvLowerRayTracing.cpp | 1 + llpc/lower/llpcSpirvLowerTerminator.cpp | 13 +- llpc/lower/llpcSpirvLowerTerminator.h | 1 - llpc/lower/llpcSpirvLowerTranslator.cpp | 15 +- llpc/lower/llpcSpirvLowerTranslator.h | 5 +- llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp | 48 +++ llpc/lower/llpcSpirvProcessGpuRtLibrary.h | 2 + ...Array_Load_With_Array_Load_Result_lit.frag | 2 +- .../core/OOB_Check_Dependent_Load_lit.frag | 2 +- .../core/OOB_Check_Load_Array_Loop_lit.frag | 2 +- .../core/OOB_Check_Load_Array_lit.frag | 2 +- .../OOB_Check_Load_Array_with_Struct_lit.frag | 2 +- .../OOB_Check_Load_Matrix_Vector_lit.frag | 2 +- .../core/OOB_Check_Load_Matrix_lit.frag | 2 +- .../OOB_Check_Load_Nested_Struct_lit.frag | 2 +- .../core/OOB_Check_Load_Struct_lit.frag | 2 +- .../core/OOB_Check_Load_Vector_lit.frag | 2 +- .../core/OOB_Check_Multiple_Load_lit.frag | 2 +- .../core/OOB_Check_Optimization_lit.frag | 2 +- .../core/OOB_Check_Store_Array_lit.frag | 2 +- .../core/OOB_Check_Store_Struct_lit.frag | 2 +- .../core/OpFOrdEqual_TestVec3_lit.frag | 2 +- .../core/OpFOrdNotEqual_TestVec3_lit.frag | 2 +- .../shaderdb/core/OpIEqual_TestIvec2_lit.frag | 2 +- .../core/OpINotEqual_TestIvec2_lit.frag | 2 +- .../OpLogicalNotEqual_TestGeneral_lit.frag | 4 +- llpc/test/shaderdb/core/OpPtrEqualTest.spvasm | 6 +- ...ssThanEqual_TestSignedAndUnsigned_lit.frag | 2 +- ...OpSLessThan_TestSignedAndUnsigned_lit.frag | 2 +- .../ExtShaderInt64_TestRelationalOp_lit.frag | 6 +- .../ExtShaderVote_TestGeneral_lit.frag | 2 +- .../OpExtInst_TestFmaDouble_lit.frag | 10 +- .../OpExtInst_TestFmaFloat_lit.frag | 10 +- .../OpExtInst_TestMixSelectDouble_lit.frag | 10 +- .../OpExtInst_TestMixSelectFloat_lit.frag | 10 +- .../OpExtInst_TestMixSelectInt_lit.frag | 10 +- .../OpExtInst_TestMixSelectUint_lit.frag | 10 +- .../PipelineVsFs_TestFetchSingleInput.pipe | 278 -------------- .../general/PipelineVsFs_TestUberShader.pipe | 2 +- .../shaderdb/general/UndefVertexOutput.spvasm | 40 +- .../gfx11/TessFactorStoreWithOpt.pipe | 2 +- .../PipelineVsFs_ConstantData_Vs2Fs1.pipe | 6 +- .../ObjPushConst_TestNestedStruct_lit.vert | 2 +- .../ObjPushConst_TestSpillToMemory_lit.vert | 2 +- .../object/ObjPushConstant_TestBasic_lit.frag | 4 +- .../PipelineRays_TestLaunchKernel.pipe | 188 ++++++++++ .../ray_tracing/Shader_0xE4BF4BB5EC6FAB41.spv | Bin 0 -> 218568 bytes .../PipelineGsTess_AllStagesReloc.pipe | 189 ---------- .../PipelineGs_VertAttributeShort.pipe | 80 ---- .../PipelineVsFs_16BitInput.pipe | 63 ---- .../PipelineVsFs_VertShortInput.pipe | 68 ---- .../PipelineVsFs_VertexFetchDouble.pipe | 72 ---- llpc/tool/amdllpc.cpp | 6 + llpc/tool/llpcAutoLayout.cpp | 9 +- llpc/tool/llpcCompilationUtils.cpp | 19 +- llpc/translator/include/LLVMSPIRVLib.h | 3 +- llpc/translator/lib/SPIRV/SPIRVInternal.h | 8 + llpc/translator/lib/SPIRV/SPIRVReader.cpp | 188 +++++++++- llpc/translator/lib/SPIRV/SPIRVReader.h | 8 + .../lib/SPIRV/libSPIRV/SPIRVEntry.cpp | 7 + .../lib/SPIRV/libSPIRV/SPIRVEntry.h | 4 +- .../translator/lib/SPIRV/libSPIRV/SPIRVEnum.h | 3 + .../lib/SPIRV/libSPIRV/SPIRVInstruction.h | 36 ++ .../lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h | 15 + .../lib/SPIRV/libSPIRV/SPIRVModule.cpp | 48 +++ .../lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 8 + .../lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h | 5 + .../lib/SPIRV/libSPIRV/SPIRVType.cpp | 12 +- .../translator/lib/SPIRV/libSPIRV/SPIRVType.h | 1 + llpc/util/llpcShaderModuleHelper.cpp | 20 +- shared/continuations/CMakeLists.txt | 1 + .../include/continuations/GpurtContext.h | 60 +++ .../continuations/include/lgc/GpurtDialect.td | 78 ++-- .../continuations/include/lgc/LgcRtDialect.td | 12 +- shared/continuations/lib/GpurtContext.cpp | 40 ++ .../lib/LowerRaytracingPipeline.cpp | 68 +++- ...er-rt-pipeline-small-payload-field.ll.hlsl | 15 +- .../test/dx/payload-caller-in-paq.ll.hlsl | 36 +- .../test/dx/payload-save-registers.ll.hlsl | 62 ++-- tool/vfx/CMakeLists.txt | 9 +- tool/vfx/vfxSection.cpp | 4 +- util/extensions.txt | 6 + util/vkgcCapability.h | 3 + util/vkgcExtension.cpp | 4 + util/vkgcExtension.h | 4 + 210 files changed, 2453 insertions(+), 3367 deletions(-) delete mode 100644 lgc/elfLinker/FetchShader.cpp delete mode 100644 lgc/elfLinker/FetchShader.h create mode 100644 lgc/include/lgc/patch/LowerSubgroupOps.h create mode 100644 lgc/patch/LowerSubgroupOps.cpp delete mode 100644 lgc/test/FetchShaderSingleInput.lgc delete mode 100644 lgc/test/IntVectorVertexInput.lgc delete mode 100644 lgc/test/UnlinkedTessFetches.lgc delete mode 100644 lgc/test/UnlinkedVsGsInputs.lgc create mode 100644 llpc/lower/PrepareContinuations.cpp create mode 100644 llpc/lower/PrepareContinuations.h delete mode 100644 llpc/test/shaderdb/extensions/PipelineVsFs_TestFetchSingleInput.pipe create mode 100644 llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe create mode 100644 llpc/test/shaderdb/ray_tracing/Shader_0xE4BF4BB5EC6FAB41.spv delete mode 100644 llpc/test/shaderdb/relocatable_shaders/PipelineGsTess_AllStagesReloc.pipe delete mode 100644 llpc/test/shaderdb/relocatable_shaders/PipelineGs_VertAttributeShort.pipe delete mode 100644 llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_16BitInput.pipe delete mode 100644 llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_VertShortInput.pipe delete mode 100644 llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_VertexFetchDouble.pipe create mode 100644 shared/continuations/include/continuations/GpurtContext.h create mode 100644 shared/continuations/lib/GpurtContext.cpp diff --git a/compilerutils/lib/CompilerUtils.cpp b/compilerutils/lib/CompilerUtils.cpp index 81f8ae571d..a2b50ab0f3 100644 --- a/compilerutils/lib/CompilerUtils.cpp +++ b/compilerutils/lib/CompilerUtils.cpp @@ -299,14 +299,32 @@ CompilerUtils::CrossModuleInliner::inlineCall(IRBuilder<> &b, llvm::Function *ca if (!callee->getReturnType()->isVoidTy()) fakeUse = cast(b.CreateFreeze(call)); + // If the builder is at the end of the basic block then we don't have complete IR yet. We need some placeholder to + // know where to reset the insert point to. + Instruction *insertPointMarker = nullptr; + if (b.GetInsertPoint() == b.GetInsertBlock()->end()) { + assert(!b.GetInsertBlock()->getTerminator()); + if (fakeUse) + insertPointMarker = fakeUse; + else + insertPointMarker = b.CreateUnreachable(); + } + auto newBBs = inlineCall(*call); + if (insertPointMarker) { + b.SetInsertPoint(insertPointMarker->getParent()); + if (insertPointMarker != fakeUse) + insertPointMarker->eraseFromParent(); + } else { + b.SetInsertPoint(&*b.GetInsertPoint()); + } + Value *result = nullptr; if (fakeUse) { result = fakeUse->getOperand(0); fakeUse->eraseFromParent(); } - b.SetInsertPoint(&*b.GetInsertPoint()); return {result, newBBs}; } diff --git a/include/khronos/spirv/spirv.hpp b/include/khronos/spirv/spirv.hpp index 1531e8bd33..b9c8743371 100644 --- a/include/khronos/spirv/spirv.hpp +++ b/include/khronos/spirv/spirv.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2014-2020 The Khronos Group Inc. +// Copyright (c) 2014-2024 The Khronos Group Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and/or associated documentation files (the "Materials"), @@ -73,6 +73,7 @@ enum SourceLanguage { SourceLanguageNZSL = 9, SourceLanguageWGSL = 10, SourceLanguageSlang = 11, + SourceLanguageZig = 12, SourceLanguageMax = 0x7fffffff, }; @@ -183,6 +184,8 @@ enum ExecutionMode { ExecutionModeStencilRefUnchangedBackAMD = 5082, ExecutionModeStencilRefGreaterBackAMD = 5083, ExecutionModeStencilRefLessBackAMD = 5084, + ExecutionModeQuadDerivativesKHR = 5088, + ExecutionModeRequireFullQuadsKHR = 5089, ExecutionModeOutputLinesEXT = 5269, ExecutionModeOutputLinesNV = 5269, ExecutionModeOutputPrimitivesEXT = 5270, @@ -207,6 +210,8 @@ enum ExecutionMode { ExecutionModeNoGlobalOffsetINTEL = 5895, ExecutionModeNumSIMDWorkitemsINTEL = 5896, ExecutionModeSchedulerTargetFmaxMhzINTEL = 5903, + ExecutionModeMaximallyReconvergesKHR = 6023, + ExecutionModeFPFastMathDefault = 6028, ExecutionModeStreamingInterfaceINTEL = 6154, ExecutionModeRegisterMapInterfaceINTEL = 6160, ExecutionModeNamedBarrierCountINTEL = 6417, @@ -426,8 +431,11 @@ enum FPFastMathModeShift { FPFastMathModeNSZShift = 2, FPFastMathModeAllowRecipShift = 3, FPFastMathModeFastShift = 4, + FPFastMathModeAllowContractShift = 16, FPFastMathModeAllowContractFastINTELShift = 16, + FPFastMathModeAllowReassocShift = 17, FPFastMathModeAllowReassocINTELShift = 17, + FPFastMathModeAllowTransformShift = 18, FPFastMathModeMax = 0x7fffffff, }; @@ -438,8 +446,11 @@ enum FPFastMathModeMask { FPFastMathModeNSZMask = 0x00000004, FPFastMathModeAllowRecipMask = 0x00000008, FPFastMathModeFastMask = 0x00000010, + FPFastMathModeAllowContractMask = 0x00010000, FPFastMathModeAllowContractFastINTELMask = 0x00010000, + FPFastMathModeAllowReassocMask = 0x00020000, FPFastMathModeAllowReassocINTELMask = 0x00020000, + FPFastMathModeAllowTransformMask = 0x00040000, }; enum FPRoundingMode { @@ -583,6 +594,9 @@ enum Decoration { DecorationMergeINTEL = 5834, DecorationBankBitsINTEL = 5835, DecorationForcePow2DepthINTEL = 5836, + DecorationStridesizeINTEL = 5883, + DecorationWordsizeINTEL = 5884, + DecorationTrueDualPortINTEL = 5885, DecorationBurstCoalesceINTEL = 5899, DecorationCacheSizeINTEL = 5900, DecorationDontStaticallyCoalesceINTEL = 5901, @@ -601,9 +615,7 @@ enum Decoration { DecorationSingleElementVectorINTEL = 6085, DecorationVectorComputeCallableFunctionINTEL = 6087, DecorationMediaBlockIOINTEL = 6140, - DecorationInitModeINTEL = 6147, - DecorationImplementInRegisterMapINTEL = 6148, - DecorationHostAccessINTEL = 6168, + DecorationStallFreeINTEL = 6151, DecorationFPMaxErrorDecorationINTEL = 6170, DecorationLatencyControlLabelINTEL = 6172, DecorationLatencyControlConstraintINTEL = 6173, @@ -616,6 +628,9 @@ enum Decoration { DecorationMMHostInterfaceMaxBurstINTEL = 6181, DecorationMMHostInterfaceWaitRequestINTEL = 6182, DecorationStableKernelArgumentINTEL = 6183, + DecorationHostAccessINTEL = 6188, + DecorationInitModeINTEL = 6190, + DecorationImplementInRegisterMapINTEL = 6191, DecorationCacheControlLoadINTEL = 6442, DecorationCacheControlStoreINTEL = 6443, DecorationMax = 0x7fffffff, @@ -1068,6 +1083,7 @@ enum Capability { CapabilityInt64ImageEXT = 5016, CapabilityShaderClockKHR = 5055, CapabilityShaderEnqueueAMDX = 5067, + CapabilityQuadControlKHR = 5087, CapabilitySampleMaskOverrideCoverageNV = 5249, CapabilityGeometryShaderPassthroughNV = 5251, CapabilityShaderViewportIndexLayerEXT = 5254, @@ -1187,21 +1203,24 @@ enum Capability { CapabilityCooperativeMatrixKHR = 6022, CapabilityBitInstructions = 6025, CapabilityGroupNonUniformRotateKHR = 6026, + CapabilityFloatControls2 = 6029, CapabilityAtomicFloat32AddEXT = 6033, CapabilityAtomicFloat64AddEXT = 6034, - CapabilityLongConstantCompositeINTEL = 6089, + CapabilityLongCompositesINTEL = 6089, CapabilityOptNoneINTEL = 6094, CapabilityAtomicFloat16AddEXT = 6095, CapabilityDebugInfoModuleINTEL = 6114, CapabilityBFloat16ConversionINTEL = 6115, CapabilitySplitBarrierINTEL = 6141, - CapabilityGlobalVariableFPGADecorationsINTEL = 6146, + CapabilityFPGAClusterAttributesV2INTEL = 6150, CapabilityFPGAKernelAttributesv2INTEL = 6161, - CapabilityGlobalVariableHostAccessINTEL = 6167, CapabilityFPMaxErrorINTEL = 6169, CapabilityFPGALatencyControlINTEL = 6171, CapabilityFPGAArgumentInterfacesINTEL = 6174, + CapabilityGlobalVariableHostAccessINTEL = 6187, + CapabilityGlobalVariableFPGADecorationsINTEL = 6189, CapabilityGroupUniformArithmeticKHR = 6400, + CapabilityMaskedGatherScatterINTEL = 6427, CapabilityCacheControlsINTEL = 6441, CapabilityMax = 0x7fffffff, }; @@ -1774,6 +1793,8 @@ enum Op { OpFinalizeNodePayloadsAMDX = 5075, OpFinishWritingNodePayloadAMDX = 5078, OpInitializeNodePayloadsAMDX = 5090, + OpGroupNonUniformQuadAllKHR = 5110, + OpGroupNonUniformQuadAnyKHR = 5111, OpHitObjectRecordHitMotionNV = 5249, OpHitObjectRecordHitWithIndexMotionNV = 5250, OpHitObjectRecordMissMotionNV = 5251, @@ -2083,6 +2104,7 @@ enum Op { OpTypeStructContinuedINTEL = 6090, OpConstantCompositeContinuedINTEL = 6091, OpSpecConstantCompositeContinuedINTEL = 6092, + OpCompositeConstructContinuedINTEL = 6096, OpConvertFToBF16INTEL = 6116, OpConvertBF16ToFINTEL = 6117, OpControlBarrierArriveINTEL = 6142, @@ -2095,6 +2117,8 @@ enum Op { OpGroupLogicalAndKHR = 6406, OpGroupLogicalOrKHR = 6407, OpGroupLogicalXorKHR = 6408, + OpMaskedGatherINTEL = 6428, + OpMaskedScatterINTEL = 6429, OpMax = 0x7fffffff, }; @@ -2502,6 +2526,8 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpFinalizeNodePayloadsAMDX: *hasResult = false; *hasResultType = false; break; case OpFinishWritingNodePayloadAMDX: *hasResult = true; *hasResultType = true; break; case OpInitializeNodePayloadsAMDX: *hasResult = false; *hasResultType = false; break; + case OpGroupNonUniformQuadAllKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadAnyKHR: *hasResult = true; *hasResultType = true; break; case OpHitObjectRecordHitMotionNV: *hasResult = false; *hasResultType = false; break; case OpHitObjectRecordHitWithIndexMotionNV: *hasResult = false; *hasResultType = false; break; case OpHitObjectRecordMissMotionNV: *hasResult = false; *hasResultType = false; break; @@ -2806,6 +2832,7 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break; case OpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; case OpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case OpCompositeConstructContinuedINTEL: *hasResult = true; *hasResultType = true; break; case OpConvertFToBF16INTEL: *hasResult = true; *hasResultType = true; break; case OpConvertBF16ToFINTEL: *hasResult = true; *hasResultType = true; break; case OpControlBarrierArriveINTEL: *hasResult = false; *hasResultType = false; break; @@ -2818,6 +2845,8 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpGroupLogicalAndKHR: *hasResult = true; *hasResultType = true; break; case OpGroupLogicalOrKHR: *hasResult = true; *hasResultType = true; break; case OpGroupLogicalXorKHR: *hasResult = true; *hasResultType = true; break; + case OpMaskedGatherINTEL: *hasResult = true; *hasResultType = true; break; + case OpMaskedScatterINTEL: *hasResult = false; *hasResultType = false; break; } } #endif /* SPV_ENABLE_UTILITY_CODE */ diff --git a/include/vkgcDefs.h b/include/vkgcDefs.h index 3ffb6a2080..3c5d92f5ed 100644 --- a/include/vkgcDefs.h +++ b/include/vkgcDefs.h @@ -1175,6 +1175,8 @@ struct GraphicsPipelineBuildInfo { #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 68 ICache *cache; ///< ICache, used to search for the compiled shader data #endif + uint64_t pipelineApiHash; ///< Pipeline hash. If non-zero, this will be used directly as stable hash + PipelineShaderInfo task; ///< Task shader PipelineShaderInfo vs; ///< Vertex shader PipelineShaderInfo tcs; ///< Tessellation control shader @@ -1267,6 +1269,7 @@ struct ComputePipelineBuildInfo { #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 68 ICache *cache; ///< ICache, used to search for the compiled shader data #endif + uint64_t pipelineApiHash; ///< Pipeline hash. If non-zero, this will be used directly as stable hash unsigned deviceIndex; ///< Device index for device group PipelineShaderInfo cs; ///< Compute shader ResourceMappingData resourceMapping; ///< Resource mapping graph and static descriptor values @@ -1290,13 +1293,14 @@ struct RayTracingPipelineBuildInfo { #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 68 ICache *cache; ///< ICache, used to search for the compiled shader data #endif - unsigned deviceIndex; ///< Device index for device group - unsigned deviceCount; ///< Device count for device group - unsigned shaderCount; ///< Count of shader info - PipelineShaderInfo *pShaders; ///< An array of shader info - ResourceMappingData resourceMapping; ///< Resource mapping graph and static descriptor values - uint64_t pipelineLayoutApiHash; ///< Pipeline Layout Api Hash - unsigned shaderGroupCount; ///< Count of shader group + uint64_t pipelineApiHash; ///< Pipeline hash. If non-zero, this will be used directly as stable hash + unsigned deviceIndex; ///< Device index for device group + unsigned deviceCount; ///< Device count for device group + unsigned shaderCount; ///< Count of shader info + PipelineShaderInfo *pShaders; ///< An array of shader info + ResourceMappingData resourceMapping; ///< Resource mapping graph and static descriptor values + uint64_t pipelineLayoutApiHash; ///< Pipeline Layout Api Hash + unsigned shaderGroupCount; ///< Count of shader group const VkRayTracingShaderGroupCreateInfoKHR *pShaderGroups; ///< An array of shader group LibraryMode libraryMode; ///< Whether to compile as pipeline or library or both unsigned libraryCount; ///< Count of libraries linked into this build diff --git a/lgc/CMakeLists.txt b/lgc/CMakeLists.txt index 088e1a342a..1535bf5b0e 100644 --- a/lgc/CMakeLists.txt +++ b/lgc/CMakeLists.txt @@ -128,7 +128,6 @@ target_sources(LLVMlgc PRIVATE target_sources(LLVMlgc PRIVATE elfLinker/ColorExportShader.cpp elfLinker/ElfLinker.cpp - elfLinker/FetchShader.cpp elfLinker/GlueShader.cpp elfLinker/NullFragmentShader.cpp ) @@ -142,6 +141,7 @@ target_sources(LLVMlgc PRIVATE patch/Gfx9ConfigBuilder.cpp patch/LowerDebugPrintf.cpp patch/LowerDesc.cpp + patch/LowerSubgroupOps.cpp patch/MeshTaskShader.cpp patch/NggPrimShader.cpp patch/Patch.cpp diff --git a/lgc/builder/BuilderRecorder.cpp b/lgc/builder/BuilderRecorder.cpp index b039121209..758612329c 100644 --- a/lgc/builder/BuilderRecorder.cpp +++ b/lgc/builder/BuilderRecorder.cpp @@ -260,12 +260,8 @@ StringRef BuilderRecorder::getCallName(BuilderOpcode opcode) { return "get.wave.size"; case BuilderOpcode::GetSubgroupSize: return "get.subgroup.size"; - case BuilderOpcode::SubgroupElect: - return "subgroup.elect"; case BuilderOpcode::SubgroupAll: return "subgroup.all"; - case BuilderOpcode::SubgroupAny: - return "subgroup.any"; case BuilderOpcode::SubgroupAllEqual: return "subgroup.all.equal"; case BuilderOpcode::SubgroupRotate: @@ -1620,14 +1616,6 @@ Value *Builder::CreateGetSubgroupSize(const Twine &instName) { return record(BuilderOpcode::GetSubgroupSize, getInt32Ty(), {}, instName); } -// ===================================================================================================================== -// Create a subgroup elect. -// -// @param instName : Name to give instruction(s) -Value *Builder::CreateSubgroupElect(const Twine &instName) { - return record(BuilderOpcode::SubgroupElect, getInt1Ty(), {}, instName); -} - // ===================================================================================================================== // Create a subgroup all. // @@ -1637,15 +1625,6 @@ Value *Builder::CreateSubgroupAll(Value *const value, const Twine &instName) { return record(BuilderOpcode::SubgroupAll, getInt1Ty(), {value}, instName); } -// ===================================================================================================================== -// Create a subgroup any -// -// @param value : The value to compare -// @param instName : Name to give instruction(s) -Value *Builder::CreateSubgroupAny(Value *const value, const Twine &instName) { - return record(BuilderOpcode::SubgroupAny, getInt1Ty(), {value}, instName); -} - // ===================================================================================================================== // Create a subgroup all equal. // @@ -2126,7 +2105,6 @@ Instruction *Builder::record(BuilderOpcode opcode, Type *resultTy, ArrayRef(module).getPipelineState(); - runImpl(module, pipelineState); - return PreservedAnalyses::none(); -} -// ===================================================================================================================== -// Run the BuilderReplayer pass on a module -// -// @param module : Module to run this pass on -// @returns : True if the module was modified by the transformation and false otherwise -bool BuilderReplayer::runImpl(Module &module, PipelineState *pipelineState) { LLVM_DEBUG(dbgs() << "Running the pass of replaying LLPC builder calls\n"); // Set up the pipeline state from the specified linked IR module. @@ -121,7 +112,7 @@ bool BuilderReplayer::runImpl(Module &module, PipelineState *pipelineState) { func->eraseFromParent(); m_builder = nullptr; - return true; + return PreservedAnalyses::none(); } // ===================================================================================================================== @@ -728,15 +719,9 @@ Value *BuilderReplayer::processCall(unsigned opcode, CallInst *call) { case BuilderOpcode::GetSubgroupSize: { return m_builder->CreateGetSubgroupSize(); } - case BuilderOpcode::SubgroupElect: { - return m_builder->CreateSubgroupElect(); - } case BuilderOpcode::SubgroupAll: { return m_builder->CreateSubgroupAll(args[0]); } - case BuilderOpcode::SubgroupAny: { - return m_builder->CreateSubgroupAny(args[0]); - } case BuilderOpcode::SubgroupAllEqual: { return m_builder->CreateSubgroupAllEqual(args[0]); } diff --git a/lgc/builder/DescBuilder.cpp b/lgc/builder/DescBuilder.cpp index 5c9609b7b1..ed3d7a35c0 100644 --- a/lgc/builder/DescBuilder.cpp +++ b/lgc/builder/DescBuilder.cpp @@ -80,6 +80,10 @@ Value *BuilderImpl::CreateBufferDesc(uint64_t descSet, unsigned binding, Value * const ResourceNode *topNode = nullptr; const ResourceNode *node = nullptr; std::tie(topNode, node) = m_pipelineState->findResourceNode(abstractType, descSet, binding, m_shaderStage); + if (!node && (flags & BufferFlagAddress)) { + std::tie(topNode, node) = + m_pipelineState->findResourceNode(ResourceNodeType::InlineBuffer, descSet, binding, m_shaderStage); + } if (!node) { // If we can't find the node, assume mutable descriptor and search for any node. std::tie(topNode, node) = @@ -111,6 +115,8 @@ Value *BuilderImpl::CreateBufferDesc(uint64_t descSet, unsigned binding, Value * } else if (node->concreteType == ResourceNodeType::InlineBuffer) { // Handle an inline buffer specially. Get a pointer to it, then expand to a descriptor. Value *descPtr = getDescPtr(node->concreteType, topNode, node, binding); + if (return64Address) + return descPtr; desc = buildInlineBufferDesc(descPtr); } else { ResourceNodeType resType = node->concreteType; @@ -404,11 +410,7 @@ Value *BuilderImpl::buildBufferCompactDesc(Value *desc) { sqBufRsrcWord3.bits.dstSelY = BUF_DST_SEL_Y; sqBufRsrcWord3.bits.dstSelZ = BUF_DST_SEL_Z; sqBufRsrcWord3.bits.dstSelW = BUF_DST_SEL_W; - if (gfxIp.major < 10) { - sqBufRsrcWord3.gfx9.numFormat = BUF_NUM_FORMAT_UINT; - sqBufRsrcWord3.gfx9.dataFormat = BUF_DATA_FORMAT_32; - assert(sqBufRsrcWord3.u32All == 0x24FAC); - } else if (gfxIp.major == 10) { + if (gfxIp.major == 10) { sqBufRsrcWord3.gfx10.format = BUF_FORMAT_32_UINT; sqBufRsrcWord3.gfx10.resourceLevel = 1; sqBufRsrcWord3.gfx10.oobSelect = 2; diff --git a/lgc/builder/ImageBuilder.cpp b/lgc/builder/ImageBuilder.cpp index a44d8f31f2..ee2583e446 100644 --- a/lgc/builder/ImageBuilder.cpp +++ b/lgc/builder/ImageBuilder.cpp @@ -1294,7 +1294,7 @@ Value *BuilderImpl::CreateImageQuerySize(unsigned dim, unsigned flags, Value *im } // Proper image. - unsigned modifiedDim = dim == DimCubeArray ? DimCube : change1DTo2DIfNeeded(dim); + unsigned modifiedDim = dim == DimCubeArray ? DimCube : dim; Value *resInfo = nullptr; GfxIpVersion gfxIp = getPipelineState()->getTargetInfo().getGfxIpVersion(); @@ -1465,24 +1465,6 @@ Value *BuilderImpl::CreateImageBvhIntersectRay(Value *nodePtr, Value *extent, Va return CreateIntrinsic(FixedVectorType::get(getInt32Ty(), 4), Intrinsic::amdgcn_image_bvh_intersect_ray, args); } -// ===================================================================================================================== -// Change 1D or 1DArray dimension to 2D or 2DArray if needed as a workaround on GFX9+ -// -// @param dim : Image dimension -unsigned BuilderImpl::change1DTo2DIfNeeded(unsigned dim) { - if (getPipelineState()->getTargetInfo().getGpuWorkarounds().gfx9.treat1dImagesAs2d) { - switch (dim) { - case Dim1D: - return Dim2D; - case Dim1DArray: - return Dim2DArray; - default: - break; - } - } - return dim; -} - // ===================================================================================================================== // Prepare coordinate and explicit derivatives, pushing the separate components into the supplied vectors, and // modifying if necessary. @@ -1519,25 +1501,6 @@ unsigned BuilderImpl::prepareCoordinate(unsigned dim, Value *coord, Value *proje outCoords[i] = CreateFMul(outCoords[i], projective); } - // For 1D or 1DArray on GFX9+, change to 2D or 2DArray and add the extra component. The - // extra component is 0 for int or 0.5 for FP. - unsigned origDim = dim; - bool needExtraDerivativeDim = false; - dim = change1DTo2DIfNeeded(dim); - if (dim != origDim) { - Value *extraComponent = getInt32(0); - needExtraDerivativeDim = true; - if (!coordScalarTy->isIntegerTy()) - extraComponent = ConstantFP::get(coordScalarTy, 0.5); - - if (dim == Dim2D) - outCoords.push_back(extraComponent); - else { - outCoords.push_back(outCoords.back()); - outCoords[1] = extraComponent; - } - } - if (coordScalarTy->isIntegerTy()) { // Integer components (image load/store/atomic). assert(!derivativeX && !derivativeY); @@ -1602,22 +1565,12 @@ unsigned BuilderImpl::prepareCoordinate(unsigned dim, Value *coord, Value *proje } else outDerivatives.push_back(derivativeX); - if (needExtraDerivativeDim) { - // GFX9+ 1D -> 2D: need extra derivative too. - outDerivatives.push_back(Constant::getNullValue(outDerivatives[0]->getType())); - } - // Derivatives by Y if (auto vectorDerivativeYTy = dyn_cast(derivativeY->getType())) { for (unsigned i = 0; i != vectorDerivativeYTy->getNumElements(); ++i) outDerivatives.push_back(CreateExtractElement(derivativeY, i)); } else outDerivatives.push_back(derivativeY); - - if (needExtraDerivativeDim) { - // GFX9+ 1D -> 2D: need extra derivative too. - outDerivatives.push_back(Constant::getNullValue(outDerivatives[0]->getType())); - } } if (outDerivatives.empty() || dim != DimCube) return dim; diff --git a/lgc/builder/InOutBuilder.cpp b/lgc/builder/InOutBuilder.cpp index e5179d8aeb..f73fbf2bf2 100644 --- a/lgc/builder/InOutBuilder.cpp +++ b/lgc/builder/InOutBuilder.cpp @@ -93,14 +93,13 @@ Value *BuilderImpl::CreateReadPerVertexInput(Type *resultTy, unsigned location, assert(m_shaderStage == ShaderStage::Fragment); // Fold constant locationOffset into location. - if (auto constLocOffset = dyn_cast(locationOffset)) { - location += constLocOffset->getZExtValue(); - locationOffset = getInt32(0); - locationCount = divideCeil(resultTy->getPrimitiveSizeInBits(), 128); - } + assert(isa(locationOffset)); + location += cast(locationOffset)->getZExtValue(); + locationOffset = getInt32(0); + locationCount = divideCeil(resultTy->getPrimitiveSizeInBits(), 128); // Mark the usage of the input/output. - markGenericInputOutputUsage(false, location, locationCount, inputInfo, vertexIndex); + markGenericInputOutputUsage(false, location, locationCount, inputInfo, vertexIndex != nullptr); // Lambda to do the actual input read. auto readInput = [&](Value *vertexIndex) { @@ -182,16 +181,17 @@ Value *BuilderImpl::readGenericInputOutput(bool isOutput, Type *resultTy, unsign // Fold constant locationOffset into location. (Currently a variable locationOffset is only supported in // TCS, TES, mesh shader, and FS custom interpolation.) - bool isDynLocOffset = true; + bool directlyMapLocations = true; if (auto constLocOffset = dyn_cast(locationOffset)) { location += constLocOffset->getZExtValue(); locationOffset = getInt32(0); locationCount = (resultTy->getPrimitiveSizeInBits() + 127U) / 128U; - isDynLocOffset = false; + directlyMapLocations = false; // Reset this flag if dynamic location indexing is avoided } // Mark the usage of the input/output. - markGenericInputOutputUsage(isOutput, location, locationCount, inOutInfo, vertexIndex, isDynLocOffset); + markGenericInputOutputUsage(isOutput, location, locationCount, inOutInfo, vertexIndex != nullptr, + directlyMapLocations); // Generate LLPC call for reading the input/output. Value *result = nullptr; @@ -269,17 +269,17 @@ Instruction *BuilderImpl::CreateWriteGenericOutput(Value *valueToWrite, unsigned // Fold constant locationOffset into location (Currently a variable locationOffset is only supported in // TCS or mesh shader). - bool isDynLocOffset = true; + bool directlyMapLocations = true; if (auto constLocOffset = dyn_cast(locationOffset)) { location += constLocOffset->getZExtValue(); locationOffset = getInt32(0); locationCount = (valueToWrite->getType()->getPrimitiveSizeInBits() + 127U) / 128U; - isDynLocOffset = false; + directlyMapLocations = false; // Reset this flag if dynamic location indexing is avoided } // Mark the usage of the output. - markGenericInputOutputUsage(/*isOutput=*/true, location, locationCount, outputInfo, vertexOrPrimitiveIndex, - isDynLocOffset); + markGenericInputOutputUsage(/*isOutput=*/true, location, locationCount, outputInfo, vertexOrPrimitiveIndex != nullptr, + directlyMapLocations); // Set up the args for the llpc call. SmallVector args; @@ -348,13 +348,16 @@ Instruction *BuilderImpl::CreateWriteGenericOutput(Value *valueToWrite, unsigned // @param location : Input/output base location // @param locationCount : Count of locations taken by the input/output // @param inOutInfo : Extra input/output information -// @param vertexOrPrimIndex : For TCS/TES/GS/mesh shader per-vertex input/output: vertex index; -// for mesh shader per-primitive output: primitive index; -// for FS custom-interpolated input: auxiliary value; -// else nullptr. -// @param isDynLocOffset : Whether the location offset is dynamic indexing +// @param hasVertexOrPrimIndex : Whether this input or output takes a vertex or primitive index. For TCS/TES/GS/mesh +// shader, this is the vertex index for per-vertex input/output; for mesh shader, this +// is the primitive index for per-primitive output; for FS custom-interpolated input, +// this is the auxiliary value. +// @param directlyMapLocations : Directly map locations to new ones with trivial map (keep location/component +// unchanged). This is for dynamic indexing for arrayed input/output when locations of +// their elements are dynamic indexed. void BuilderImpl::markGenericInputOutputUsage(bool isOutput, unsigned location, unsigned locationCount, - InOutInfo &inOutInfo, Value *vertexOrPrimIndex, bool isDynLocOffset) { + InOutInfo &inOutInfo, bool hasVertexOrPrimIndex, + bool directlyMapLocations) { auto resUsage = getPipelineState()->getShaderResourceUsage(m_shaderStage.value()); // Mark the input or output locations as in use. @@ -366,7 +369,7 @@ void BuilderImpl::markGenericInputOutputUsage(bool isOutput, unsigned location, // Per-primitive input assert(m_shaderStage == ShaderStage::Fragment); // Must be FS perPrimitiveInOutLocMap = &resUsage->inOutUsage.perPrimitiveInputLocMap; - } else if (m_shaderStage != ShaderStage::TessEval || vertexOrPrimIndex) { + } else if (m_shaderStage != ShaderStage::TessEval || hasVertexOrPrimIndex) { // Per-vertex input inOutLocInfoMap = &resUsage->inOutUsage.inputLocInfoMap; } else { @@ -379,7 +382,7 @@ void BuilderImpl::markGenericInputOutputUsage(bool isOutput, unsigned location, // Per-primitive output assert(m_shaderStage == ShaderStage::Mesh); // Must be mesh shader perPrimitiveInOutLocMap = &resUsage->inOutUsage.perPrimitiveOutputLocMap; - } else if (m_shaderStage != ShaderStage::TessControl || vertexOrPrimIndex) { + } else if (m_shaderStage != ShaderStage::TessControl || hasVertexOrPrimIndex) { // Per-vertex output inOutLocInfoMap = &resUsage->inOutUsage.outputLocInfoMap; } else { @@ -426,8 +429,8 @@ void BuilderImpl::markGenericInputOutputUsage(bool isOutput, unsigned location, origLocationInfo.setLocation(location + i); origLocationInfo.setComponent(inOutInfo.getComponent()); auto &newLocationInfo = (*inOutLocInfoMap)[origLocationInfo]; - if (isDynLocOffset) { - // When dynamic indexing, map the location directly + if (directlyMapLocations) { + // Directly map the locations (trivial map) without further calculation newLocationInfo.setLocation(location + i); newLocationInfo.setComponent(inOutInfo.getComponent()); } else @@ -449,7 +452,8 @@ void BuilderImpl::markGenericInputOutputUsage(bool isOutput, unsigned location, // Add location map entries for this input/output for (unsigned i = 0; i < locationCount; ++i) (*perPatchInOutLocMap)[location + i] = - isDynLocOffset ? location + i : InvalidValue; // When dynamic indexing, map the location + directlyMapLocations ? location + i + : InvalidValue; // Directly map the locations (trivial map) without further calculation } if (perPrimitiveInOutLocMap) { @@ -466,7 +470,8 @@ void BuilderImpl::markGenericInputOutputUsage(bool isOutput, unsigned location, // Add location map entries for this input/output for (unsigned i = 0; i < locationCount; ++i) (*perPrimitiveInOutLocMap)[location + i] = - isDynLocOffset ? location + i : InvalidValue; // When dynamic indexing, map the location + directlyMapLocations ? location + i + : InvalidValue; // Directly map the locations (trivial map) without further calculation } } else { // GS output @@ -848,7 +853,7 @@ Value *BuilderImpl::CreateReadBaryCoord(BuiltInKind builtIn, InOutInfo inputInfo assert(interpMode == InOutInfo::InterpModeSmooth); (void)interpMode; - return normalizeBaryCoord(interpValue); + return normalizeBaryCoord(inputInfo, interpValue); } // ===================================================================================================================== @@ -1026,7 +1031,7 @@ void BuilderImpl::getProvokingVertexInfo(llvm::Value **isOne, llvm::Value **isTw // // @param iJCoord : IJ coordinates provided for the HW interpolation view // @returns : gl_Barycoord -Value *BuilderImpl::normalizeBaryCoord(Value *iJCoord) { +Value *BuilderImpl::normalizeBaryCoord(InOutInfo inputInfo, Value *iJCoord) { auto baryType = FixedVectorType::get(getFloatTy(), 3); auto zero = ConstantFP::get(getFloatTy(), 0.0); auto one = ConstantFP::get(getFloatTy(), 1.0); @@ -1061,6 +1066,11 @@ Value *BuilderImpl::normalizeBaryCoord(Value *iJCoord) { barycoord1 = CreateInsertElement(barycoord1, hwCoord[1], 1); barycoord1 = CreateInsertElement(barycoord1, hwCoord[2], 2); + if (inputInfo.isProvokingVertexModeDisabled()) { + // return the original i,j,k w/o any adjustment + return barycoord1; + } + Value *barycoord0 = CreateShuffleVector(barycoord1, ArrayRef({2, 0, 1})); Value *barycoord2 = CreateShuffleVector(barycoord1, ArrayRef({1, 2, 0})); return CreateSelect(isOne, barycoord1, CreateSelect(isTwo, barycoord2, barycoord0)); diff --git a/lgc/builder/SubgroupBuilder.cpp b/lgc/builder/SubgroupBuilder.cpp index 92f755e72a..28e3943a61 100644 --- a/lgc/builder/SubgroupBuilder.cpp +++ b/lgc/builder/SubgroupBuilder.cpp @@ -74,14 +74,6 @@ unsigned BuilderImpl::getShaderWaveSize() { return getPipelineState()->getShaderWaveSize(shaderStage.value()); } -// ===================================================================================================================== -// Create a subgroup elect call. -// -// @param instName : Name to give final instruction. -Value *BuilderImpl::CreateSubgroupElect(const Twine &instName) { - return CreateICmpEQ(CreateSubgroupMbcnt(createGroupBallot(getTrue()), ""), getInt32(0)); -} - // ===================================================================================================================== // Create a subgroup all call. // @@ -101,25 +93,6 @@ Value *BuilderImpl::CreateSubgroupAll(Value *const value, const Twine &instName) return result; } -// ===================================================================================================================== -// Create a subgroup any call. -// -// @param value : The value to compare across the subgroup. Must be an integer type. -// @param instName : Name to give final instruction. -Value *BuilderImpl::CreateSubgroupAny(Value *const value, const Twine &instName) { - Value *result = CreateICmpNE(createGroupBallot(value), getInt64(0)); - result = CreateSelect(CreateUnaryIntrinsic(Intrinsic::is_constant, value), value, result); - - // Helper invocations of whole quad mode should be included in the subgroup vote execution - const auto &fragmentMode = m_pipelineState->getShaderModes()->getFragmentShaderMode(); - if (m_shaderStage == ShaderStage::Fragment && !fragmentMode.waveOpsExcludeHelperLanes) { - result = CreateZExt(result, getInt32Ty()); - result = CreateIntrinsic(Intrinsic::amdgcn_softwqm, {getInt32Ty()}, {result}); - result = CreateTrunc(result, getInt1Ty()); - } - return result; -} - // ===================================================================================================================== // Create a subgroup all equal call. // diff --git a/lgc/builder/YCbCrAddressHandler.cpp b/lgc/builder/YCbCrAddressHandler.cpp index 807f528e3c..f8636534cc 100644 --- a/lgc/builder/YCbCrAddressHandler.cpp +++ b/lgc/builder/YCbCrAddressHandler.cpp @@ -57,7 +57,6 @@ void YCbCrAddressHandler::genBaseAddress(unsigned planeCount) { Value *pipeBankXorNone = m_builder->getInt32(0); switch (m_gfxIp->major) { - case 9: case 11: { pipeBankXor1 = pipeBankXorNone; pipeBankXor2 = pipeBankXorNone; @@ -138,28 +137,6 @@ void YCbCrAddressHandler::genHeightAndPitch(unsigned bits, unsigned bpp, unsigne m_swizzleMode = m_regHandler->getReg(SqRsrcRegs::SwizzleMode); switch (m_gfxIp->major) { - case 9: { - // Height = SqRsrcRegs::Height - Value *height = m_regHandler->getReg(SqRsrcRegs::Height); - // HeightHalf = Height * 0.5 - Value *heightHalf = m_builder->CreateLShr(height, m_one); - - m_heightY = height; - m_heightCb = heightHalf; - - // Pitch = SqRsrcRegs::Pitch - Value *pitch = m_regHandler->getReg(SqRsrcRegs::Pitch); - // PitchHalf = Pitch * 0.5 - Value *pitchHalf = m_builder->CreateLShr(pitch, m_one); - - // PitchY * (xBitCount >> 3) - m_pitchY = m_builder->CreateMul(pitch, m_builder->CreateLShr(m_builder->getInt32(xBitCount), 3)); - - // PitchCb = PitchCb * (xBitCount >> 3) - m_pitchCb = m_builder->CreateMul(pitchHalf, m_builder->CreateLShr(m_builder->getInt32(xBitCount), 3)); - - break; - } case 10: case 11: { const unsigned elementBytes = bpp >> 3; diff --git a/lgc/builder/YCbCrConverter.cpp b/lgc/builder/YCbCrConverter.cpp index a9642ae580..7a9de464f1 100644 --- a/lgc/builder/YCbCrConverter.cpp +++ b/lgc/builder/YCbCrConverter.cpp @@ -365,17 +365,6 @@ void YCbCrConverter::genImgDescChroma() { Value *isBgRgFmt = nullptr; switch (m_gfxIp->major) { - case 9: { - isGbGrFmt = m_builder->CreateICmpEQ(imgDataFmt, - m_builder->getInt32(BuilderImpl::ImgDataFormat::IMG_DATA_FORMAT_BG_RG__CORE)); - - isBgRgFmt = m_builder->CreateICmpEQ(imgDataFmt, - m_builder->getInt32(BuilderImpl::ImgDataFormat::IMG_DATA_FORMAT_GB_GR__CORE)); - - proxySqRsrcRegHelper.setReg(SqRsrcRegs::Format, - m_builder->getInt32(BuilderImpl::ImgDataFormat::IMG_DATA_FORMAT_8_8_8_8)); - break; - } case 10: { isGbGrFmt = m_builder->CreateICmpEQ( imgDataFmt, m_builder->getInt32(BuilderImpl::ImgFmtGfx10::IMG_FMT_BG_RG_UNORM__GFX10CORE)); diff --git a/lgc/elfLinker/ColorExportShader.cpp b/lgc/elfLinker/ColorExportShader.cpp index 713fc8cc1a..05069d2ee0 100644 --- a/lgc/elfLinker/ColorExportShader.cpp +++ b/lgc/elfLinker/ColorExportShader.cpp @@ -59,6 +59,7 @@ ColorExportShader::ColorExportShader(PipelineState *pipelineState, ArrayRefgetOptions().enableColorExportShader); + + // ColorExportState + MaxColorTargets * (expfmt + writeMask) + sizeEstimate += sizeof(ColorExportState) + MaxColorTargets * (sizeof(unsigned) * 2); + if (m_key.colorExportState.dualSourceBlendDynamicEnable || m_key.colorExportState.dualSourceBlendEnable) { + sizeEstimate += sizeof(m_key.waveSize); + } m_shaderString.reserve(sizeEstimate); for (ColorExportInfo colorExportInfo : m_exports) { @@ -82,13 +91,24 @@ StringRef ColorExportShader::getString() { StringRef(reinterpret_cast(&colorExportInfo.location), sizeof(colorExportInfo.location)); m_shaderString += getTypeName(colorExportInfo.ty); } + unsigned gfxip = m_lgcContext->getTargetInfo().getGfxIpVersion().major; + m_shaderString += StringRef(reinterpret_cast(&gfxip), sizeof(unsigned)); m_shaderString += StringRef(reinterpret_cast(&m_killEnabled), sizeof(m_killEnabled)); + m_shaderString += StringRef(reinterpret_cast(&m_pipelineState->getOptions().enableColorExportShader), + sizeof(m_pipelineState->getOptions().enableColorExportShader)); - const ColorExportState *colorExportState = &m_pipelineState->getColorExportState(); + const ColorExportState *colorExportState = &m_key.colorExportState; m_shaderString += StringRef(reinterpret_cast(colorExportState), sizeof(*colorExportState)); for (unsigned location = 0; location < MaxColorTargets; ++location) { - const ColorExportFormat *colorExportFormat = &m_pipelineState->getColorExportFormat(location); - m_shaderString += StringRef(reinterpret_cast(colorExportFormat), sizeof(*colorExportFormat)); + unsigned expFmt = m_key.expFmt[location]; + unsigned writeMask = m_key.channelWriteMask[location]; + m_shaderString += StringRef(reinterpret_cast(&expFmt), sizeof(expFmt)); + m_shaderString += StringRef(reinterpret_cast(&writeMask), sizeof(writeMask)); + } + + if (m_key.colorExportState.dualSourceBlendDynamicEnable || m_key.colorExportState.dualSourceBlendEnable) { + unsigned waveSize = m_key.waveSize; + m_shaderString += StringRef(reinterpret_cast(&waveSize), sizeof(waveSize)); } } return m_shaderString; @@ -107,7 +127,7 @@ Module *ColorExportShader::generate() { Function *colorExportFunc = createColorExportFunc(); // Process each fragment output. - FragColorExport fragColorExport(&getContext(), m_pipelineState); + FragColorExport fragColorExport(m_lgcContext); auto ret = cast(colorExportFunc->back().getTerminator()); BuilderBase builder(ret); @@ -121,10 +141,9 @@ Module *ColorExportShader::generate() { PalMetadata palMetadata{m_pipelineState, m_pipelineState->useRegisterFieldFormat()}; - bool dummyExport = m_lgcContext->getTargetInfo().getGfxIpVersion().major < 10 || m_killEnabled; Value *dynamicIsDualSource = colorExportFunc->getArg(lastIndex); - fragColorExport.generateExportInstructions(m_exports, values, dummyExport, &palMetadata, builder, - dynamicIsDualSource); + fragColorExport.generateExportInstructions(m_exports, values, m_killEnabled, &palMetadata, builder, + dynamicIsDualSource, m_key); // Handle on the dualSourceBlend case which may have two blocks with two returnInsts SmallVector retInsts; diff --git a/lgc/elfLinker/ColorExportShader.h b/lgc/elfLinker/ColorExportShader.h index 5fc63b9098..836a1d485b 100644 --- a/lgc/elfLinker/ColorExportShader.h +++ b/lgc/elfLinker/ColorExportShader.h @@ -32,6 +32,7 @@ #pragma once #include "GlueShader.h" +#include "lgc/patch/FragColorExport.h" #include "lgc/state/PalMetadata.h" #include "lgc/state/PipelineState.h" @@ -82,6 +83,7 @@ class ColorExportShader : public GlueShader { // The encoded or hashed (in some way) single string version of the above. std::string m_shaderString; bool m_killEnabled; // True if this fragment shader has kill enabled. + FragColorExport::Key m_key; }; } // namespace lgc diff --git a/lgc/elfLinker/ElfLinker.cpp b/lgc/elfLinker/ElfLinker.cpp index edcb061c9a..07b8f0b802 100644 --- a/lgc/elfLinker/ElfLinker.cpp +++ b/lgc/elfLinker/ElfLinker.cpp @@ -161,7 +161,7 @@ class ElfLinkerImpl final : public ElfLinker { llvm::ArrayRef getGlueInfo() override final; // Explicitly build color export shader - StringRef buildColorExportShader(ArrayRef exports, bool enableKill) override final; + StringRef createColorExportShader(ArrayRef exports, bool enableKill) override final; // Add a blob for a particular chunk of glue code, typically retrieved from a cache void addGlue(unsigned glueIndex, StringRef blob) override final; @@ -311,15 +311,6 @@ void ElfLinkerImpl::doneInputs() { // Create a GlueShader object for each glue shader needed for this link. This does not actually create // the glue shaders themselves, just the GlueShader objects that represent them. void ElfLinkerImpl::createGlueShaders() { - // Create a fetch shader object if we need one. - SmallVector fetches; - m_pipelineState->getPalMetadata()->getVertexFetchInfo(fetches); - if (!fetches.empty()) { - VsEntryRegInfo vsEntryRegInfo = {}; - m_pipelineState->getPalMetadata()->getVsEntryRegInfo(vsEntryRegInfo); - m_glueShaders.push_back(GlueShader::createFetchShader(m_pipelineState, fetches, vsEntryRegInfo)); - } - if (m_pipelineState->isGraphics() && !this->m_pipelineState->getShaderStageMask().contains(ShaderStage::Fragment)) { m_glueShaders.push_back(GlueShader::createNullFragmentShader(m_pipelineState)); } @@ -357,19 +348,20 @@ ArrayRef ElfLinkerImpl::getGlueInfo() { } // ===================================================================================================================== -// Build color export shader +// Create color export shader // // @param exports : Fragment export info // @param enableKill : Whether this fragment shader has kill enabled. // @param zFmt : depth-export-format -StringRef ElfLinkerImpl::buildColorExportShader(ArrayRef exports, bool enableKill) { +StringRef ElfLinkerImpl::createColorExportShader(ArrayRef exports, bool enableKill) { assert(m_glueShaders.empty()); m_glueShaders.push_back(GlueShader::createColorExportShader(m_pipelineState, exports)); ColorExportShader *copyColorShader = static_cast(m_glueShaders[0].get()); if (enableKill) copyColorShader->enableKill(); + m_doneInputs = true; - return copyColorShader->getElfBlob(); + return copyColorShader->getString(); } // ===================================================================================================================== diff --git a/lgc/elfLinker/FetchShader.cpp b/lgc/elfLinker/FetchShader.cpp deleted file mode 100644 index 6d53861f7d..0000000000 --- a/lgc/elfLinker/FetchShader.cpp +++ /dev/null @@ -1,346 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ -/** - *********************************************************************************************************************** - * @file FetchShader.cpp - * @brief LGC source file: The class to generate the fetch shader used when linking a pipeline. - *********************************************************************************************************************** - */ - -#include "FetchShader.h" -#include "lgc/patch/ShaderInputs.h" -#include "lgc/patch/VertexFetch.h" -#include "lgc/util/AddressExtender.h" -#include "lgc/util/BuilderBase.h" -#include "llvm/IR/IntrinsicsAMDGPU.h" -#include "llvm/Target/TargetMachine.h" - -using namespace lgc; -using namespace llvm; - -constexpr uint32_t LsHsSysValueMergedWaveInfo = 3; - -// ===================================================================================================================== -// Constructor. This is where we store all the information needed to generate the fetch shader; other methods -// do not need to look at PipelineState. -// -// @param pipelineState : The pipeline state for which the fetch shader will be generated. -// @param fetches : The vertex fetch information for the vertex shader. -// @param vsEntryRegInfo : The information about the contents of the parameters to the vertex shader. -FetchShader::FetchShader(PipelineState *pipelineState, ArrayRef fetches, - const VsEntryRegInfo &vsEntryRegInfo) - : GlueShader(pipelineState), m_vsEntryRegInfo(vsEntryRegInfo) { - m_fetches.append(fetches.begin(), fetches.end()); - for (const auto &fetch : m_fetches) - m_fetchDescriptions.push_back(pipelineState->findVertexInputDescription(fetch.location)); - m_fixLsVgprInput = pipelineState->getTargetInfo().getGpuWorkarounds().gfx9.fixLsVgprInput; -} - -// ===================================================================================================================== -// Get the string for this fetch shader. This is some encoding or hash of the inputs to the createFetchShader function -// that the front-end client can use as a cache key to avoid compiling the same glue shader more than once. -StringRef FetchShader::getString() { - if (m_shaderString.empty()) { - for (VertexFetchInfo fetchInfo : m_fetches) { - m_shaderString += StringRef(reinterpret_cast(&fetchInfo.location), sizeof(fetchInfo.location)); - m_shaderString += StringRef(reinterpret_cast(&fetchInfo.component), sizeof(fetchInfo.component)); - m_shaderString += getTypeName(fetchInfo.ty); - } - m_shaderString += StringRef(reinterpret_cast(&m_vsEntryRegInfo), sizeof(m_vsEntryRegInfo)).str(); - for (const VertexInputDescription *description : m_fetchDescriptions) { - if (!description) - m_shaderString += StringRef("\0", 1); - else - m_shaderString += StringRef(reinterpret_cast(description), sizeof(*description)); - } - m_shaderString += m_fixLsVgprInput; - } - return m_shaderString; -} - -// ===================================================================================================================== -// Get the symbol name of the main shader that this glue shader is prolog or epilog for -StringRef FetchShader::getMainShaderName() { - return getEntryPointName(m_vsEntryRegInfo.callingConv, /*isFetchlessVs=*/true); -} - -// ===================================================================================================================== -// Generate the IR module for the fetch shader -Module *FetchShader::generate() { - // Create the function. - Function *fetchFunc = createFetchFunc(); - generateFetchShaderBody(fetchFunc); - replaceShaderInputBuiltInFunctions(fetchFunc); - return fetchFunc->getParent(); -} - -// ===================================================================================================================== -// Generate the body of the fetch function using the shader input builtins to access the inputs to the shader. -// -// @param [in/out] fetchFunc : The function for the fetch shader. -void FetchShader::generateFetchShaderBody(Function *fetchFunc) { // Process each vertex input. - std::unique_ptr vertexFetch( - VertexFetch::create(m_lgcContext, m_pipelineState->getOptions().useSoftwareVertexBufferDescriptors, - m_pipelineState->getOptions().vbAddressLowBitsKnown)); - auto ret = cast(fetchFunc->back().getTerminator()); - BuilderImpl builder(m_pipelineState); - builder.SetInsertPoint(ret); - Value *result = ret->getOperand(0); - - for (unsigned idx = 0; idx != m_fetches.size(); ++idx) { - const auto &fetch = m_fetches[idx]; - const VertexInputDescription *description = m_fetchDescriptions[idx]; - unsigned structIdx = idx + m_vsEntryRegInfo.sgprCount + m_vsEntryRegInfo.vgprCount; - - if (description) { - // Fetch the vertex. - Value *vertex = vertexFetch->fetchVertex(fetch.ty, description, fetch.location, fetch.component, builder); - Type *ty = cast(result->getType())->getElementType(structIdx); - - if (fetch.ty->getPrimitiveSizeInBits() < ty->getPrimitiveSizeInBits()) { - // If the number of bits do not match, we should zero-extend the value so that we can do the bit cast. We - // assume that the number of bits in ty is no larger than 64. If the scalar size of fetch.ty is 32 or larger, - // the total size is a multiple of 32, and the condition above will be false. If the scalar size of fetch.ty is - // 8 or 16, the vector size cannot be larger than 4, so the maximum size is 16*4, which is 64. - assert(ty->getPrimitiveSizeInBits() <= 64); - Type *smallerIntType = Type::getIntNTy(ty->getContext(), fetch.ty->getPrimitiveSizeInBits()); - Type *largerIntType = Type::getIntNTy(ty->getContext(), ty->getPrimitiveSizeInBits()); - vertex = builder.CreateBitCast(vertex, smallerIntType); - vertex = builder.CreateZExt(vertex, largerIntType); - } - - vertex = builder.CreateBitCast(vertex, ty); - result = builder.CreateInsertValue(result, vertex, structIdx); - } - } - ret->setOperand(0, result); -} - -// ===================================================================================================================== -// Replaces calls to the shader input builtins in fetchFunc with code that will get the appropriate values from the -// arguments. -// -// @param [in/out] fetchFunc : The function for the fetch shader. -void FetchShader::replaceShaderInputBuiltInFunctions(Function *fetchFunc) const { - auto ret = cast(fetchFunc->back().getTerminator()); - BuilderBase builder(ret); - // Hook up the inputs (vertex buffer, base vertex, base instance, - // vertex ID, instance ID). The fetchVertex calls - // left its uses of them as lgc.special.user.data and lgc.shader.input calls. - for (Function &func : *fetchFunc->getParent()) { - if (!func.isDeclaration()) - continue; - if (func.getName().starts_with(lgcName::SpecialUserData) || func.getName().starts_with(lgcName::ShaderInput)) { - while (!func.use_empty()) { - auto call = cast(func.use_begin()->getUser()); - Value *replacement = nullptr; - replacement = getReplacementForInputBuiltIn(call); - call->replaceAllUsesWith(replacement); - call->eraseFromParent(); - } - } - } -} - -// ===================================================================================================================== -// Returns the value that is represented by |call|. It will be in a position where in can be used in place of all -// uses of |call|. -// -// @param call : A call to a shader input builtin that needs to be replaced. -// @returns : The value that is represented by |call|. -Value *FetchShader::getReplacementForInputBuiltIn(CallInst *call) const { - switch (cast(call->getArgOperand(0))->getZExtValue()) { - case static_cast(UserDataMapping::VertexBufferTable): - return getReplacementForVertexBufferTableBuiltIn(call); - case static_cast(UserDataMapping::BaseVertex): - return call->getFunction()->getArg(m_vsEntryRegInfo.baseVertex); - case static_cast(UserDataMapping::BaseInstance): - return call->getFunction()->getArg(m_vsEntryRegInfo.baseInstance); - case static_cast(ShaderInput::VertexId): - return getReplacementForVertexIdBuiltIn(call); - case static_cast(ShaderInput::InstanceId): - return getReplacementForInstanceIdBuiltIn(call); - default: - llvm_unreachable("Unexpected special user data or shader input"); - } - return nullptr; -} - -// ===================================================================================================================== -// Returns the value of the instance id. All new code will be place at the start of the function containing call. -// -// @param call : A call to the InstanceId shader input builtin. -// @returns : The value of the InstanceId. -Value *FetchShader::getReplacementForInstanceIdBuiltIn(CallInst *call) const { - Function *callerFunction = call->getFunction(); - return getVgprArgumentAsAnInt32(m_vsEntryRegInfo.instanceId, callerFunction); -} - -// ===================================================================================================================== -// Returns the value of the vertex id. All new code will be place at the start of the function containing call. -// -// @param call : A call to the VertexId shader input builtin. -// @returns : The value of the VertexId. -Value *FetchShader::getReplacementForVertexIdBuiltIn(CallInst *call) const { - Function *callerFunction = call->getFunction(); - return getVgprArgumentAsAnInt32(m_vsEntryRegInfo.vertexId, callerFunction); -} - -// ===================================================================================================================== -// Returns the value of the argument in the function that corresponds to the given VGPR cast to a 32-bit integer. -// -// @param vgpr : The VGPR number from which the value should come. -// @param function : The function from which to get the argument. -// @returns : The value of the argument as a 32-bit integer. -Value *FetchShader::getVgprArgumentAsAnInt32(unsigned vgpr, Function *function) const { - BuilderBase builder(&*function->front().getFirstNonPHIOrDbgOrAlloca()); - Value *vertexId = getVpgrArgument(vgpr, builder); - return builder.CreateBitCast(vertexId, builder.getInt32Ty()); -} - -// ===================================================================================================================== -// Returns the value of the argument in the function that corresponds to the given VGPR. The function that is used will -// be the same as the function that contains the insertion point of the builder. All new instructions will be added -// using the builder. -// -// @param vgpr : The VGPR number from which the value should come. -// @param builder : The builder to use if new instructions are needed. -// @returns : The value of the argument in the function. -Value *FetchShader::getVpgrArgument(unsigned vgpr, BuilderBase &builder) const { - Function *function = builder.GetInsertPoint()->getFunction(); - if (!mustFixLsVgprInput()) - return function->getArg(m_vsEntryRegInfo.sgprCount + vgpr); - - // On GFX9, the hardware will shift the LS input vgprs by 2 when the HS is null (ie has vertex count 0). The vertex - // count is not know ahead of time, so it must be checked at runtime. - constexpr unsigned offsetCorrection = 2; - - Type *int32Type = builder.getInt32Ty(); - Value *mergeWaveInfo = function->getArg(LsHsSysValueMergedWaveInfo); - Value *eight = builder.getInt32(8); - std::array args = {mergeWaveInfo, eight, eight}; - Value *hsVertexCount = builder.CreateIntrinsic(Intrinsic::amdgcn_ubfe, int32Type, args, nullptr, "HsVertCount"); - Value *isNullHs = builder.CreateICmp(CmpInst::ICMP_EQ, hsVertexCount, builder.getInt32(0), "IsNullHs"); - - Value *valueForNonNullHs = function->getArg(m_vsEntryRegInfo.sgprCount + vgpr); - Value *valueForNullHs = function->getArg(m_vsEntryRegInfo.sgprCount + vgpr - offsetCorrection); - return builder.CreateSelect(isNullHs, valueForNullHs, valueForNonNullHs, "VgprArgument"); -} - -// ===================================================================================================================== -// Returns true if the fetch shader must fix up the VGPR input registers to account for the way GFX9 provides the LS -// VGPR inputs. -// -// @returns : Returns true if the fetch shader must fix up the VGPR input registers. -bool FetchShader::mustFixLsVgprInput() const { - return (m_fixLsVgprInput && m_vsEntryRegInfo.callingConv == CallingConv::AMDGPU_HS); -} - -// ===================================================================================================================== -// Returns the value of the address of the VertexBufferTable. All new code will be place at the start of the -// -// @param call : A call to the VertexBufferTable shader input builtin. -// @returns : The value of the address of the VertexBufferTable. -Value *FetchShader::getReplacementForVertexBufferTableBuiltIn(CallInst *call) const { - // Need to extend 32-bit vertex buffer table address to 64 bits. - Function *callerFunction = call->getFunction(); - AddressExtender extender(callerFunction); - Value *highAddr = call->getArgOperand(1); - BuilderBase builder(&*callerFunction->front().getFirstNonPHIOrDbgOrAlloca()); - Argument *vertexBufferTable = callerFunction->getArg(m_vsEntryRegInfo.vertexBufferTable); - return extender.extend(vertexBufferTable, highAddr, call->getType(), builder); -} - -// ===================================================================================================================== -// Create module with function for the fetch shader. On return, the function contains only the code to copy the -// wave dispatch SGPRs and VGPRs to the return value. -Function *FetchShader::createFetchFunc() { - // Create the module - Module *module = new Module("fetchShader", getContext()); - TargetMachine *targetMachine = m_lgcContext->getTargetMachine(); - module->setTargetTriple(targetMachine->getTargetTriple().getTriple()); - module->setDataLayout(targetMachine->createDataLayout()); - - // Get the function type. Its inputs are the wave dispatch SGPRs and VGPRs. Its return type is a struct - // containing the wave dispatch SGPRs and VGPRs, plus the fetched values in VGPRs. In the return type struct, - // VGPR values must be FP so the back-end puts them into VGPRs; we do the same for the inputs for symmetry. - SmallVector types; - types.append(m_vsEntryRegInfo.sgprCount, Type::getInt32Ty(getContext())); - types.append(m_vsEntryRegInfo.vgprCount, Type::getFloatTy(getContext())); - for (const auto &fetch : m_fetches) - types.push_back(getVgprTy(fetch.ty)); - Type *retTy = StructType::get(getContext(), types); - auto entryTys = ArrayRef(types).slice(0, m_vsEntryRegInfo.sgprCount + m_vsEntryRegInfo.vgprCount); - auto funcTy = FunctionType::get(retTy, entryTys, false); - - // Create the function. Mark SGPR inputs as "inreg". - Function *func = Function::Create(funcTy, GlobalValue::ExternalLinkage, getGlueShaderName(), module); - func->setCallingConv(m_vsEntryRegInfo.callingConv); - for (unsigned i = 0; i != m_vsEntryRegInfo.sgprCount + m_vsEntryRegInfo.vgprCount; ++i) { - Argument *arg = func->getArg(i); - if (i < m_vsEntryRegInfo.sgprCount) - arg->addAttr(Attribute::InReg); - arg->addAttr(Attribute::NoUndef); - } - - // Add mnemonic names to input args. - if (m_vsEntryRegInfo.callingConv == CallingConv::AMDGPU_HS) - func->getArg(LsHsSysValueMergedWaveInfo)->setName("MergedWaveInfo"); - func->getArg(m_vsEntryRegInfo.vertexBufferTable)->setName("VertexBufferTable"); - func->getArg(m_vsEntryRegInfo.baseVertex)->setName("BaseVertex"); - func->getArg(m_vsEntryRegInfo.baseInstance)->setName("BaseInstance"); - func->getArg(m_vsEntryRegInfo.sgprCount + m_vsEntryRegInfo.vertexId)->setName("VertexId"); - func->getArg(m_vsEntryRegInfo.sgprCount + m_vsEntryRegInfo.instanceId)->setName("InstanceId"); - - setShaderStage(func, ShaderStage::Vertex); - - BasicBlock *block = BasicBlock::Create(func->getContext(), "", func); - BuilderBase builder(block); - if (m_vsEntryRegInfo.callingConv == CallingConv::AMDGPU_HS || - m_vsEntryRegInfo.callingConv == CallingConv::AMDGPU_GS) { - // The VS is the first half of a merged shader, LS-HS or ES-GS. This fetch shader needs to include code - // to enable the correct lanes for the vertices. It happens that LS vertex count in LS-HS and ES vertex - // count in ES-GS are in the same place: the low 8 bits of s3. - constexpr unsigned mergedWaveInfoSgpr = 3; - builder.CreateIntrinsic(Intrinsic::amdgcn_init_exec_from_input, {}, - {func->getArg(mergedWaveInfoSgpr), builder.getInt32(0)}); - } - - // Copy the wave dispatch SGPRs and VGPRs from inputs to outputs. - builder.SetInsertPoint(&func->back()); - Value *retVal = PoisonValue::get(retTy); - for (unsigned i = 0; i != m_vsEntryRegInfo.sgprCount + m_vsEntryRegInfo.vgprCount; ++i) - retVal = builder.CreateInsertValue(retVal, func->getArg(i), i); - builder.CreateRet(retVal); - - AttrBuilder attribBuilder(func->getContext()); - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 10) { - const unsigned waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Vertex); - attribBuilder.addAttribute("target-features", ",+wavefrontsize" + std::to_string(waveSize)); // Set wavefront size - } - func->addFnAttrs(attribBuilder); - - return func; -} diff --git a/lgc/elfLinker/FetchShader.h b/lgc/elfLinker/FetchShader.h deleted file mode 100644 index 778303059f..0000000000 --- a/lgc/elfLinker/FetchShader.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ - -/** - *********************************************************************************************************************** - * @file FetchShader.h - * @brief LGC header file: The class to generate the fetch shader used when linking a pipeline. - *********************************************************************************************************************** - */ -#pragma once - -#include "GlueShader.h" -#include "lgc/state/PalMetadata.h" -#include "lgc/state/PipelineState.h" -#include "lgc/util/BuilderBase.h" - -namespace lgc { - -class LgcContext; - -// ===================================================================================================================== -// A fetch shader -class FetchShader : public GlueShader { -public: - FetchShader(PipelineState *pipelineState, llvm::ArrayRef fetches, - const VsEntryRegInfo &vsEntryRegInfo); - - // Get the string for this glue shader. This is some encoding or hash of the inputs to the create*Shader function - // that the front-end client can use as a cache key to avoid compiling the same glue shader more than once. - llvm::StringRef getString() override; - - // Get the symbol name of the main shader that this glue shader is prolog or epilog for. - llvm::StringRef getMainShaderName() override; - - // Get the symbol name of the glue shader. - llvm::StringRef getGlueShaderName() override { - return getEntryPointName(m_vsEntryRegInfo.callingConv, /*isFetchlessVs=*/false); - } - - // Get whether this glue shader is a prolog (rather than epilog) for its main shader. - bool isProlog() override { return true; } - - // Get the name of this glue shader. - llvm::StringRef getName() const override { return "fetch shader"; } - - // No PAL metadata entries need updating for the fetch shader. - void updatePalMetadata(PalMetadata &) override { return; } - -protected: - // Generate the glue shader to IR module - llvm::Module *generate() override; - -private: - llvm::Function *createFetchFunc(); - void generateFetchShaderBody(llvm::Function *fetchFunc); - - void replaceShaderInputBuiltInFunctions(llvm::Function *fetchFunc) const; - llvm::Value *getReplacementForVertexBufferTableBuiltIn(llvm::CallInst *call) const; - llvm::Value *getReplacementForInputBuiltIn(llvm::CallInst *call) const; - llvm::Value *getReplacementForVertexIdBuiltIn(llvm::CallInst *call) const; - llvm::Value *getReplacementForInstanceIdBuiltIn(llvm::CallInst *call) const; - llvm::Value *getVgprArgumentAsAnInt32(unsigned vgpr, llvm::Function *function) const; - llvm::Value *getVpgrArgument(unsigned vgpr, BuilderBase &builder) const; - bool mustFixLsVgprInput() const; - - // The information stored here is all that is needed to generate the fetch shader. We deliberately do not - // have access to PipelineState, so we can hash the information here and let the front-end use it as the - // key for a cache of glue shaders. - llvm::SmallVector m_fetches; - VsEntryRegInfo m_vsEntryRegInfo; - llvm::SmallVector m_fetchDescriptions; - // The encoded or hashed (in some way) single string version of the above. - std::string m_shaderString; - - // True if the fetch shader must work around the hardware sometimes shifting the vgpr inputs by two. - bool m_fixLsVgprInput = false; -}; - -} // namespace lgc diff --git a/lgc/elfLinker/GlueShader.cpp b/lgc/elfLinker/GlueShader.cpp index c3d4ca240e..a51a6e203d 100644 --- a/lgc/elfLinker/GlueShader.cpp +++ b/lgc/elfLinker/GlueShader.cpp @@ -31,7 +31,6 @@ */ #include "GlueShader.h" #include "ColorExportShader.h" -#include "FetchShader.h" #include "NullFragmentShader.h" #include "lgc/state/PassManagerCache.h" #include "llvm-dialects/Dialect/Dialect.h" @@ -61,14 +60,6 @@ void GlueShader::compile(raw_pwrite_stream &outStream) { m_lgcContext->getPassManagerCache()->resetStream(); } -// ===================================================================================================================== -// Create a fetch shader object -std::unique_ptr GlueShader::createFetchShader(PipelineState *pipelineState, - ArrayRef fetches, - const VsEntryRegInfo &vsEntryRegInfo) { - return std::make_unique(pipelineState, fetches, vsEntryRegInfo); -} - // ===================================================================================================================== // Create a color export shader object std::unique_ptr GlueShader::createColorExportShader(PipelineState *pipelineState, diff --git a/lgc/elfLinker/GlueShader.h b/lgc/elfLinker/GlueShader.h index 5e00bd6bdc..4bb2a42c15 100644 --- a/lgc/elfLinker/GlueShader.h +++ b/lgc/elfLinker/GlueShader.h @@ -45,10 +45,6 @@ class GlueShader { public: virtual ~GlueShader() {} - // Create a fetch shader - static std::unique_ptr createFetchShader(PipelineState *pipelineState, - llvm::ArrayRef fetches, - const VsEntryRegInfo &vsEntryRegInfo); // Create a color export shader static std::unique_ptr createColorExportShader(PipelineState *pipelineState, llvm::ArrayRef exports); diff --git a/lgc/elfLinker/NullFragmentShader.cpp b/lgc/elfLinker/NullFragmentShader.cpp index 73af575fb9..9d763dead8 100644 --- a/lgc/elfLinker/NullFragmentShader.cpp +++ b/lgc/elfLinker/NullFragmentShader.cpp @@ -44,23 +44,10 @@ using namespace llvm; // @returns : The module containing the null fragment shader. Module *NullFragmentShader::generate() { Module *module = generateEmptyModule(); - Function *entryPoint = FragColorExport::generateNullFragmentShader(*module, m_pipelineState, getGlueShaderName()); - addDummyExportIfNecessary(entryPoint); + FragColorExport::generateNullFragmentShader(*module, m_pipelineState, getGlueShaderName()); return module; } -// ===================================================================================================================== -// Adds a dummy export to the entry point if it is needed. -// -// @param [in/out] entryPoint : The function in which to add the dummy export. -void NullFragmentShader::addDummyExportIfNecessary(Function *entryPoint) const { - if (m_lgcContext->getTargetInfo().getGfxIpVersion().major < 10) { - auto ret = cast(entryPoint->back().getTerminator()); - BuilderBase builder(ret); - FragColorExport::addDummyExport(builder); - } -} - // ===================================================================================================================== // Creates an empty module to be used for generating the null fragment shader. // diff --git a/lgc/elfLinker/NullFragmentShader.h b/lgc/elfLinker/NullFragmentShader.h index f16ed4fd10..f7033469bb 100644 --- a/lgc/elfLinker/NullFragmentShader.h +++ b/lgc/elfLinker/NullFragmentShader.h @@ -69,7 +69,6 @@ class NullFragmentShader : public GlueShader { protected: llvm::Module *generate() override; llvm::Module *generateEmptyModule() const; - void addDummyExportIfNecessary(llvm::Function *entryPoint) const; }; } // namespace lgc diff --git a/lgc/include/lgc/builder/BuilderImpl.h b/lgc/include/lgc/builder/BuilderImpl.h index b28856e159..aefb6c4eeb 100644 --- a/lgc/include/lgc/builder/BuilderImpl.h +++ b/lgc/include/lgc/builder/BuilderImpl.h @@ -413,9 +413,6 @@ class BuilderImpl : public BuilderDefs { llvm::Value *imageDesc, llvm::Value *coord, llvm::Value *inputValue, llvm::Value *comparatorValue, const llvm::Twine &instName = ""); - // Change 1D or 1DArray dimension to 2D or 2DArray if needed as a workaround on GFX9+ - unsigned change1DTo2DIfNeeded(unsigned dim); - // Prepare coordinate and explicit derivatives, pushing the separate components into the supplied vectors, and // modifying if necessary. // Returns possibly modified image dimension. @@ -516,7 +513,7 @@ class BuilderImpl : public BuilderDefs { // Mark usage for a generic (user) input or output void markGenericInputOutputUsage(bool isOutput, unsigned location, unsigned locationCount, InOutInfo &inOutInfo, - llvm::Value *vertexOrPrimIndex, bool isDynLocOffset = false); + bool hasVertexOrPrimIndex, bool directlyMapLocations = false); // Mark interpolation info for FS input. void markInterpolationInfo(InOutInfo &interpInfo); @@ -533,7 +530,7 @@ class BuilderImpl : public BuilderDefs { llvm::Value *index, const llvm::Twine &instName = ""); // Reorder the barycoord - llvm::Value *normalizeBaryCoord(llvm::Value *ijCoord); + llvm::Value *normalizeBaryCoord(InOutInfo inputInfo, llvm::Value *ijCoord); // Get provoking vertex value void getProvokingVertexInfo(llvm::Value **isOne, llvm::Value **isTwo); @@ -647,15 +644,9 @@ class BuilderImpl : public BuilderDefs { // Create a get subgroup size query. llvm::Value *CreateGetSubgroupSize(const llvm::Twine &instName = ""); - // Create a subgroup elect. - llvm::Value *CreateSubgroupElect(const llvm::Twine &instName = ""); - // Create a subgroup all. llvm::Value *CreateSubgroupAll(llvm::Value *const value, const llvm::Twine &instName = ""); - // Create a subgroup any - llvm::Value *CreateSubgroupAny(llvm::Value *const value, const llvm::Twine &instName = ""); - // Create a subgroup all equal. llvm::Value *CreateSubgroupAllEqual(llvm::Value *const value, const llvm::Twine &instName = ""); @@ -794,6 +785,8 @@ class BuilderImpl : public BuilderDefs { llvm::Value *const value2); uint16_t getDsSwizzleBitMode(uint8_t xorMask, uint8_t orMask, uint8_t andMask); uint16_t getDsSwizzleQuadMode(uint8_t lane0, uint8_t lane1, uint8_t lane2, uint8_t lane3); + +protected: llvm::Value *createGroupBallot(llvm::Value *const value); llvm::Value *createFindMsb(llvm::Value *const mask); }; diff --git a/lgc/include/lgc/builder/BuilderReplayer.h b/lgc/include/lgc/builder/BuilderReplayer.h index c01761e598..1e113e8308 100644 --- a/lgc/include/lgc/builder/BuilderReplayer.h +++ b/lgc/include/lgc/builder/BuilderReplayer.h @@ -45,8 +45,6 @@ class BuilderReplayer final : public llvm::PassInfoMixin { public: llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - bool runImpl(llvm::Module &module, PipelineState *pipelineState); - static llvm::StringRef name() { return "Replay LLPC builder calls"; } private: diff --git a/lgc/include/lgc/patch/FragColorExport.h b/lgc/include/lgc/patch/FragColorExport.h index 24ce910fed..2975286399 100644 --- a/lgc/include/lgc/patch/FragColorExport.h +++ b/lgc/include/lgc/patch/FragColorExport.h @@ -59,11 +59,21 @@ enum class CompSetting : unsigned { // Represents the manager of fragment color export operations. class FragColorExport { public: - FragColorExport(llvm::LLVMContext *context, PipelineState *pipelineState); + // Color export Info + struct Key { + ColorExportState colorExportState; // Color export state + unsigned channelWriteMask[MaxColorTargets]; // Write mask to specify destination channels + unsigned expFmt[MaxColorTargets]; // Export format used for "export" instruction. + unsigned dualExpFmt[2]; // Dual source blend export format. valid if dual source blend is enabled. + unsigned waveSize; // The wave size for fragment. + bool enableFragColor; // Whether to broadcast frag color. Only for OGLP + }; + + FragColorExport(LgcContext *context); void generateExportInstructions(llvm::ArrayRef info, llvm::ArrayRef values, bool dummyExport, PalMetadata *palMetadata, BuilderBase &builder, - llvm::Value *dynamicIsDualSource); + llvm::Value *dynamicIsDualSource, const Key &key); static void setDoneFlag(llvm::Value *exportInst, BuilderBase &builder); static llvm::CallInst *addDummyExport(BuilderBase &builder); static llvm::Function *generateNullFragmentShader(llvm::Module &module, PipelineState *pipelineState, @@ -72,11 +82,13 @@ class FragColorExport { llvm::StringRef entryPointName); static void generateNullFragmentShaderBody(llvm::Function *entryPoint); + static Key computeKey(llvm::ArrayRef info, PipelineState *pipelineState); + private: FragColorExport() = delete; FragColorExport(const FragColorExport &) = delete; FragColorExport &operator=(const FragColorExport &) = delete; - void updateColorExportInfoWithBroadCastInfo(llvm::ArrayRef originExpinfo, + void updateColorExportInfoWithBroadCastInfo(const Key &key, llvm::ArrayRef originExpinfo, llvm::SmallVector &outExpinfo, unsigned *pCbShaderMask); llvm::Value *handleColorExportInstructions(llvm::Value *output, unsigned int hwColorExport, BuilderBase &builder, ExportFormat expFmt, const bool signedness, unsigned channelWriteMask, @@ -85,15 +97,14 @@ class FragColorExport { llvm::Value *convertToFloat(llvm::Value *value, bool signedness, BuilderBase &builder) const; llvm::Value *convertToInt(llvm::Value *value, bool signedness, BuilderBase &builder) const; - llvm::Value *dualSourceSwizzle(BuilderBase &builder); + llvm::Value *dualSourceSwizzle(unsigned waveSize, BuilderBase &builder); // Colors to be exported for dual-source-blend llvm::SmallVector m_blendSources[2]; // Number of color channels for dual-source-blend unsigned m_blendSourceChannels; - llvm::LLVMContext *m_context; // LLVM context - PipelineState *m_pipelineState; // The pipeline state + LgcContext *m_lgcContext; }; // The information needed for an export to a hardware color target. @@ -109,8 +120,6 @@ class LowerFragColorExport : public llvm::PassInfoMixin { LowerFragColorExport(); llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - bool runImpl(llvm::Module &module, PipelineShadersResult &pipelineShaders, PipelineState *pipelineState); - static llvm::StringRef name() { return "Lower fragment color export calls"; } private: diff --git a/lgc/include/lgc/patch/LowerCooperativeMatrix.h b/lgc/include/lgc/patch/LowerCooperativeMatrix.h index 5140cf8570..c67c3103ed 100644 --- a/lgc/include/lgc/patch/LowerCooperativeMatrix.h +++ b/lgc/include/lgc/patch/LowerCooperativeMatrix.h @@ -38,14 +38,18 @@ #include "llvm/IR/Function.h" namespace lgc { + +class CooperativeRowAccLoadOp; +class CooperativeRowAccStoreOp; +class CooperativeRowAccFinalizeModeOp; +class CooperativeRowAccAccumulateModeOp; + // ===================================================================================================================== // Pass to lower coopMatrix calls class LowerCooperativeMatrix : public Patch, public llvm::PassInfoMixin { public: llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - bool runImpl(llvm::Module &module, PipelineShadersResult &pipelineShaders, PipelineState *pipelineState); - static llvm::StringRef name() { return "Patch cooperative matrix calls"; } void visitCallInst(llvm::CallInst &callInst); @@ -228,7 +232,33 @@ class LowerCooperativeMatrix : public Patch, public llvm::PassInfoMixin m_coopMatrixCalls; + llvm::SmallVector m_coopRowAccCalls; PipelineState *m_pipelineState = nullptr; PipelineShadersResult *m_pipelineShaders = nullptr; GfxIpVersion m_gfxIp; diff --git a/lgc/include/lgc/patch/LowerGpuRt.h b/lgc/include/lgc/patch/LowerGpuRt.h index 8e857b2f31..a883e29bf2 100644 --- a/lgc/include/lgc/patch/LowerGpuRt.h +++ b/lgc/include/lgc/patch/LowerGpuRt.h @@ -49,6 +49,7 @@ class GpurtGetStaticFlagsOp; class GpurtGetTriangleCompressionModeOp; class GpurtGetFlattenedGroupThreadIdOp; class GpurtFloatWithRoundModeOp; +class GpurtDispatchThreadIdFlatOp; class LowerGpuRt : public llvm::PassInfoMixin { public: @@ -73,6 +74,7 @@ class LowerGpuRt : public llvm::PassInfoMixin { void visitGetTriangleCompressionMode(lgc::GpurtGetTriangleCompressionModeOp &inst); void visitGetFlattenedGroupThreadId(lgc::GpurtGetFlattenedGroupThreadIdOp &inst); void visitFloatWithRoundMode(lgc::GpurtFloatWithRoundModeOp &inst); + void visitGpurtDispatchThreadIdFlatOp(lgc::GpurtDispatchThreadIdFlatOp &inst); llvm::Value *m_stack = nullptr; // Stack array to hold stack value llvm::Type *m_stackTy = nullptr; // Stack type PipelineState *m_pipelineState = nullptr; // Pipeline state diff --git a/lgc/include/lgc/patch/LowerSubgroupOps.h b/lgc/include/lgc/patch/LowerSubgroupOps.h new file mode 100644 index 0000000000..3c257320c0 --- /dev/null +++ b/lgc/include/lgc/patch/LowerSubgroupOps.h @@ -0,0 +1,67 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file LowerSubgroupOps.h + * @brief LLPC header file: contains declaration of class lgc::LowerSubgroupOps. + *********************************************************************************************************************** + */ +#pragma once + +#include "compilerutils/TypeLowering.h" +#include "continuations/CpsStackLowering.h" +#include "lgc/LgcCpsDialect.h" +#include "lgc/LgcDialect.h" +#include "lgc/patch/Patch.h" +#include "lgc/patch/ShaderInputs.h" +#include "lgc/state/PipelineShaders.h" +#include "lgc/state/PipelineState.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/IRBuilder.h" +#include + +namespace lgc { + +class SubgroupLoweringBuilder; + +// ===================================================================================================================== +// The lower subgroup ops pass +class LowerSubgroupOps : public Patch, public llvm::PassInfoMixin { +public: + llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); + + static llvm::StringRef name() { return "Lower subgroup ops"; } + +private: + void replace(llvm::CallInst &old, llvm::Value *op); + + void visitElect(SubgroupElectOp &op); + void visitAny(SubgroupAnyOp &op); + + PipelineState *m_pipelineState = nullptr; + SubgroupLoweringBuilder *m_builder = nullptr; +}; + +} // namespace lgc diff --git a/lgc/include/lgc/patch/PatchCheckShaderCache.h b/lgc/include/lgc/patch/PatchCheckShaderCache.h index f875f53591..eb454f6461 100644 --- a/lgc/include/lgc/patch/PatchCheckShaderCache.h +++ b/lgc/include/lgc/patch/PatchCheckShaderCache.h @@ -46,8 +46,6 @@ class PatchCheckShaderCache : public Patch, public llvm::PassInfoMixin public: llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - bool runImpl(llvm::Module &module, PipelineState *pipelineState); - static llvm::StringRef name() { return "Patch attributes when image derivatives dependent on discard"; } }; diff --git a/lgc/include/lgc/patch/PatchImageOpCollect.h b/lgc/include/lgc/patch/PatchImageOpCollect.h index 527610877b..b124ae15da 100644 --- a/lgc/include/lgc/patch/PatchImageOpCollect.h +++ b/lgc/include/lgc/patch/PatchImageOpCollect.h @@ -42,8 +42,6 @@ class PatchImageOpCollect : public llvm::PassInfoMixin { public: llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - bool runImpl(llvm::Module &module, PipelineState *pipelineState); - static llvm::StringRef name() { return "Patch LLVM for image operation collecting"; } }; diff --git a/lgc/include/lgc/patch/PatchInOutImportExport.h b/lgc/include/lgc/patch/PatchInOutImportExport.h index edb1c9af39..383b0e6d8f 100644 --- a/lgc/include/lgc/patch/PatchInOutImportExport.h +++ b/lgc/include/lgc/patch/PatchInOutImportExport.h @@ -49,9 +49,6 @@ class PatchInOutImportExport : public Patch, public llvm::PassInfoMixin &getPostDominatorTree); - static llvm::StringRef name() { return "Patch LLVM for input import and output export operations"; } void visitCallInst(llvm::CallInst &callInst); diff --git a/lgc/include/lgc/patch/PatchInitializeWorkgroupMemory.h b/lgc/include/lgc/patch/PatchInitializeWorkgroupMemory.h index 43f9b47989..1bdce133f1 100644 --- a/lgc/include/lgc/patch/PatchInitializeWorkgroupMemory.h +++ b/lgc/include/lgc/patch/PatchInitializeWorkgroupMemory.h @@ -43,8 +43,6 @@ class PatchInitializeWorkgroupMemory final : public Patch, public llvm::PassInfo public: llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - bool runImpl(llvm::Module &module, PipelineShadersResult &pipelineShaders, PipelineState *pipelineState); - static llvm::StringRef name() { return "Patch for initialize workgroup memory"; } private: diff --git a/lgc/include/lgc/patch/PatchInvariantLoads.h b/lgc/include/lgc/patch/PatchInvariantLoads.h index 3146867bbe..b241621c97 100644 --- a/lgc/include/lgc/patch/PatchInvariantLoads.h +++ b/lgc/include/lgc/patch/PatchInvariantLoads.h @@ -42,8 +42,6 @@ class PatchInvariantLoads : public llvm::PassInfoMixin { public: llvm::PreservedAnalyses run(llvm::Function &function, llvm::FunctionAnalysisManager &analysisManager); - bool runImpl(llvm::Function &function, PipelineState *pipelineState); - static llvm::StringRef name() { return "Patch metadata for invariant loads"; } }; diff --git a/lgc/include/lgc/patch/PatchLlvmIrInclusion.h b/lgc/include/lgc/patch/PatchLlvmIrInclusion.h index f805b9c9f2..1a253cabb9 100644 --- a/lgc/include/lgc/patch/PatchLlvmIrInclusion.h +++ b/lgc/include/lgc/patch/PatchLlvmIrInclusion.h @@ -41,8 +41,6 @@ class PatchLlvmIrInclusion : public Patch, public llvm::PassInfoMixin, llvm::PreservedAnalyses run(llvm::Function &function, llvm::FunctionAnalysisManager &analysisManager); - bool runImpl(llvm::Function &function, PipelineState *pipelineState); - static llvm::StringRef name() { return "Patch LLVM for load scalarizer optimization"; } void visitLoadInst(llvm::LoadInst &loadInst); diff --git a/lgc/include/lgc/patch/PatchLoopMetadata.h b/lgc/include/lgc/patch/PatchLoopMetadata.h index b698ab5721..b547dc7910 100644 --- a/lgc/include/lgc/patch/PatchLoopMetadata.h +++ b/lgc/include/lgc/patch/PatchLoopMetadata.h @@ -45,8 +45,6 @@ class PatchLoopMetadata : public llvm::PassInfoMixin { llvm::PreservedAnalyses run(llvm::Loop &loop, llvm::LoopAnalysisManager &analysisManager, llvm::LoopStandardAnalysisResults &loopAnalysisResults, llvm::LPMUpdater &); - bool runImpl(llvm::Loop &loop, PipelineState *pipelineState); - static llvm::StringRef name() { return "Set or amend metadata to control loop unrolling"; } llvm::MDNode *updateMetadata(llvm::MDNode *loopId, llvm::ArrayRef prefixesToRemove, diff --git a/lgc/include/lgc/patch/PatchPeepholeOpt.h b/lgc/include/lgc/patch/PatchPeepholeOpt.h index fa552f0cec..e361c730a6 100644 --- a/lgc/include/lgc/patch/PatchPeepholeOpt.h +++ b/lgc/include/lgc/patch/PatchPeepholeOpt.h @@ -50,8 +50,6 @@ class PatchPeepholeOpt final : public llvm::InstVisitor, public: llvm::PreservedAnalyses run(llvm::Function &function, llvm::FunctionAnalysisManager &analysisManager); - bool runImpl(llvm::Function &function); - static llvm::StringRef name() { return "Patch LLVM for peephole optimizations"; } void visitIntToPtr(llvm::IntToPtrInst &intToPtr); diff --git a/lgc/include/lgc/patch/PatchPreparePipelineAbi.h b/lgc/include/lgc/patch/PatchPreparePipelineAbi.h index 00481ecdce..e567137a12 100644 --- a/lgc/include/lgc/patch/PatchPreparePipelineAbi.h +++ b/lgc/include/lgc/patch/PatchPreparePipelineAbi.h @@ -55,9 +55,6 @@ class PatchPreparePipelineAbi final : public Patch, public llvm::PassInfoMixin

readTessFactors(PipelineState *pipelineState, llvm::Value *relPatchId, diff --git a/lgc/include/lgc/patch/PatchResourceCollect.h b/lgc/include/lgc/patch/PatchResourceCollect.h index 6a07653a54..550465cc36 100644 --- a/lgc/include/lgc/patch/PatchResourceCollect.h +++ b/lgc/include/lgc/patch/PatchResourceCollect.h @@ -56,8 +56,6 @@ class PatchResourceCollect : public Patch, llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); virtual void visitCallInst(llvm::CallInst &callInst); - bool runImpl(llvm::Module &module, PipelineShadersResult &pipelineShaders, PipelineState *pipelineState); - static llvm::StringRef name() { return "Patch LLVM for resource collecting"; } private: diff --git a/lgc/include/lgc/patch/PatchSetupTargetFeatures.h b/lgc/include/lgc/patch/PatchSetupTargetFeatures.h index ee37ee226c..de716c8561 100644 --- a/lgc/include/lgc/patch/PatchSetupTargetFeatures.h +++ b/lgc/include/lgc/patch/PatchSetupTargetFeatures.h @@ -43,8 +43,6 @@ class PatchSetupTargetFeatures : public Patch, public llvm::PassInfoMixin { public: llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - bool runImpl(llvm::Module &module, PipelineState *pipelineState); - static llvm::StringRef name() { return "Lower vertex fetch calls"; } }; diff --git a/lgc/include/lgc/state/AbiUnlinked.h b/lgc/include/lgc/state/AbiUnlinked.h index 1d48b3b3aa..d87437429f 100644 --- a/lgc/include/lgc/state/AbiUnlinked.h +++ b/lgc/include/lgc/state/AbiUnlinked.h @@ -57,7 +57,6 @@ static constexpr char FetchlessLsEntryName[] = "_amdgpu_ls_main_fetchless"; // Metadata names of extra entries in .pipeline for an unlinked shader/part-pipeline namespace PipelineMetadataKey { -static const char VertexInputs[] = ".vertexInputs"; static const char ColorExports[] = ".colorExports"; static const char FragInputMapping1[] = ".fragInputs"; static const char FragInputMapping2[] = ".fragBuiltInInputs"; diff --git a/lgc/include/lgc/state/PalMetadata.h b/lgc/include/lgc/state/PalMetadata.h index 31c5979d85..7c204f9a9e 100644 --- a/lgc/include/lgc/state/PalMetadata.h +++ b/lgc/include/lgc/state/PalMetadata.h @@ -52,14 +52,6 @@ namespace lgc { class PipelineState; -// ===================================================================================================================== -// Struct with the information for one vertex fetch -struct VertexFetchInfo { - unsigned location; - unsigned component; - llvm::Type *ty; -}; - // ===================================================================================================================== // Struct with information on wave dispatch SGPRs and VGPRs for VS, written by getVsEntryRegInfo struct VsEntryRegInfo { @@ -133,15 +125,6 @@ class PalMetadata { // Set a register value in PAL metadata. If the register has a value set already, it gets overwritten. void setRegister(unsigned regNum, unsigned value); - // Store the vertex fetch in PAL metadata for a fetchless vertex shader with shader compilation. - void addVertexFetchInfo(llvm::ArrayRef fetches); - - // Get the count of vertex fetches for a fetchless vertex shader with shader compilation (or 0 otherwise). - unsigned getVertexFetchCount(); - - // Get the vertex fetch information out of PAL metadata - void getVertexFetchInfo(llvm::SmallVectorImpl &fetches); - // Get the VS entry register info. Used by the linker to generate the fetch shader. void getVsEntryRegInfo(VsEntryRegInfo ®Info); @@ -266,12 +249,11 @@ class PalMetadata { unsigned getVgprCount(unsigned callingConv); bool isWave32(unsigned callingConv); - PipelineState *m_pipelineState; // PipelineState - llvm::msgpack::Document *m_document; // The MsgPack document - llvm::msgpack::MapDocNode m_pipelineNode; // MsgPack map node for amdpal.pipelines[0] - llvm::msgpack::MapDocNode m_registers; // MsgPack map node for amdpal.pipelines[0].registers - llvm::msgpack::ArrayDocNode m_vertexInputs; // MsgPack map node for amdpal.pipelines[0].vertexInputs - llvm::msgpack::DocNode m_colorExports; // MsgPack map node for amdpal.pipelines[0].colorExports + PipelineState *m_pipelineState; // PipelineState + llvm::msgpack::Document *m_document; // The MsgPack document + llvm::msgpack::MapDocNode m_pipelineNode; // MsgPack map node for amdpal.pipelines[0] + llvm::msgpack::MapDocNode m_registers; // MsgPack map node for amdpal.pipelines[0].registers + llvm::msgpack::DocNode m_colorExports; // MsgPack map node for amdpal.pipelines[0].colorExports // Mapping from ShaderStageEnum to SPI user data register start, allowing for merged shaders and NGG. unsigned m_userDataRegMapping[ShaderStage::CountInternal] = {}; llvm::msgpack::DocNode *m_userDataLimit; // Maximum so far number of user data dwords used diff --git a/lgc/include/lgc/state/PipelineShaders.h b/lgc/include/lgc/state/PipelineShaders.h index 800a4c4ec8..6c5fb59af7 100644 --- a/lgc/include/lgc/state/PipelineShaders.h +++ b/lgc/include/lgc/state/PipelineShaders.h @@ -56,7 +56,6 @@ class PipelineShaders : public llvm::AnalysisInfoMixin { public: using Result = PipelineShadersResult; PipelineShadersResult run(llvm::Module &module, llvm::ModuleAnalysisManager &); - PipelineShadersResult runImpl(llvm::Module &module); static llvm::AnalysisKey Key; }; diff --git a/lgc/include/lgc/state/PipelineState.h b/lgc/include/lgc/state/PipelineState.h index 26b806677a..74efa28d8b 100644 --- a/lgc/include/lgc/state/PipelineState.h +++ b/lgc/include/lgc/state/PipelineState.h @@ -290,7 +290,7 @@ class PipelineState final : public Pipeline { // Accessors for color export state const ColorExportFormat &getColorExportFormat(unsigned location, bool isDynamicDsBlend = false); const bool hasColorExportFormats() { return !m_colorExportFormats.empty(); } - const ColorExportState &getColorExportState() { return m_colorExportState; } + const ColorExportState &getColorExportState() const { return m_colorExportState; } // Accessors for pipeline state unsigned getDeviceIndex() const { return m_deviceIndex; } @@ -658,7 +658,6 @@ class PipelineStateWrapper : public llvm::AnalysisInfoMixin { public: llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - bool runImpl(llvm::Module &module, PipelineState *pipelineState); static llvm::StringRef name() { return "LLPC pipeline state clearer"; } }; diff --git a/lgc/include/lgc/state/TargetInfo.h b/lgc/include/lgc/state/TargetInfo.h index caee7e9ab5..555346582d 100644 --- a/lgc/include/lgc/state/TargetInfo.h +++ b/lgc/include/lgc/state/TargetInfo.h @@ -88,17 +88,6 @@ struct GpuProperty { // Contains flags for all of the hardware workarounds which affect pipeline compilation. struct WorkaroundFlags { - union { - struct { - unsigned fixCacheLineStraddling : 1; - unsigned fixLsVgprInput : 1; - unsigned shaderImageGatherInstFix : 1; - unsigned treat1dImagesAs2d : 1; - unsigned reserved : 28; - }; - unsigned u32All; - } gfx9; - union { struct { unsigned waTessFactorBufferSizeLimitGeUtcl1Underflow : 1; diff --git a/lgc/interface/lgc/Builder.h b/lgc/interface/lgc/Builder.h index d4a31b7835..e85e5589bb 100644 --- a/lgc/interface/lgc/Builder.h +++ b/lgc/interface/lgc/Builder.h @@ -104,6 +104,9 @@ class InOutInfo { bool isPerPrimitive() const { return m_data.bits.perPrimitive; } void setPerPrimitive(bool perPrimitive = true) { m_data.bits.perPrimitive = perPrimitive; } + bool isProvokingVertexModeDisabled() const { return m_data.bits.disableProvokingVertexMode; } + void disableProvokingVertexMode(bool disable = true) { m_data.bits.disableProvokingVertexMode = disable; } + unsigned getComponent() const { return m_data.bits.component; } void setComponent(unsigned component) { assert(component < 4); // Valid component offsets are 0~3 @@ -125,6 +128,7 @@ class InOutInfo { // whole array or of an element with a variable index. unsigned perPrimitive : 1; // Mesh shader output: whether it is a per-primitive output unsigned component : 2; // Component offset, specifying which components within a location is consumed + unsigned disableProvokingVertexMode : 1; // Disable the provoking vertex mode } bits; unsigned u32All; } m_data; @@ -1399,23 +1403,12 @@ class Builder : public BuilderDefs { // @param instName : Name to give instruction(s) llvm::Value *CreateGetSubgroupSize(const llvm::Twine &instName = ""); - // Create a subgroup elect. - // - // @param instName : Name to give instruction(s) - llvm::Value *CreateSubgroupElect(const llvm::Twine &instName = ""); - // Create a subgroup all. // // @param value : The value to compare // @param instName : Name to give instruction(s) llvm::Value *CreateSubgroupAll(llvm::Value *const value, const llvm::Twine &instName = ""); - // Create a subgroup any - // - // @param value : The value to compare - // @param instName : Name to give instruction(s) - llvm::Value *CreateSubgroupAny(llvm::Value *const value, const llvm::Twine &instName = ""); - // Create a subgroup all equal. // // @param value : The value to compare diff --git a/lgc/interface/lgc/ElfLinker.h b/lgc/interface/lgc/ElfLinker.h index 5826a70f42..e9114a17f2 100644 --- a/lgc/interface/lgc/ElfLinker.h +++ b/lgc/interface/lgc/ElfLinker.h @@ -80,7 +80,8 @@ class ElfLinker { // // @param exports : Fragment export info // @param enableKill : Whether this fragment shader has kill enabled. - virtual llvm::StringRef buildColorExportShader(llvm::ArrayRef exports, bool enableKill) = 0; + // @returns hash string for color export shader, it will be used to cache key. + virtual llvm::StringRef createColorExportShader(llvm::ArrayRef exports, bool enableKill) = 0; // Add a blob for a particular chunk of glue code, typically retrieved from a cache. The blob is not copied, // and remains in use until the first of the link completing or the ElfLinker's parent Pipeline being destroyed. diff --git a/lgc/interface/lgc/LgcDialect.td b/lgc/interface/lgc/LgcDialect.td index 10b299cadc..1f5759f6cf 100644 --- a/lgc/interface/lgc/LgcDialect.td +++ b/lgc/interface/lgc/LgcDialect.td @@ -154,7 +154,7 @@ def TaskPayloadPtrOp : LgcOp<"task.payload.ptr", [Memory<[]>, WillReturn]> { } def EmitMeshTasksOp : LgcOp<"emit.mesh.tasks", [Memory<[]>]> { - let arguments = (ins I32:$groupCountX, I32:$groupCountY, I32:$groupCountZ); + let arguments = (ins I32:$group_count_x, I32:$group_count_y, I32:$group_count_z); let results = (outs); let summary = "emit the current values of all per-task output variables to the current task output"; @@ -162,14 +162,14 @@ def EmitMeshTasksOp : LgcOp<"emit.mesh.tasks", [Memory<[]>]> { In the task shader, emit the current values of all per-task output variables to the current task output by specifying the group count XYZ of the launched child mesh tasks. - `groupCountX` is X dimension of the launched child mesh tasks. - `groupCountY` is Y dimension of the launched child mesh tasks. - `groupCountZ` is Z dimension of the launched child mesh tasks. + `group_count_x` is X dimension of the launched child mesh tasks. + `group_count_y` is Y dimension of the launched child mesh tasks. + `group_count_z` is Z dimension of the launched child mesh tasks. }]; } def SetMeshOutputsOp : LgcOp<"set.mesh.outputs", [Memory<[]>]> { - let arguments = (ins I32:$vertexCount, I32:$primitiveCount); + let arguments = (ins I32:$vertex_count, I32:$primitive_count); let results = (outs); let summary = "set the actual output size of the primitives and vertices that the mesh shader workgroup will emit"; @@ -177,34 +177,34 @@ def SetMeshOutputsOp : LgcOp<"set.mesh.outputs", [Memory<[]>]> { In the mesh shader, set the actual output size of the primitives and vertices that the mesh shader workgroup will emit upon completion. - `vertexCount` is the actual output size of the vertices. - `primitiveCount` is the actual output size of the primitives. + `vertex_count` is the actual output size of the vertices. + `primitive_count` is the actual output size of the primitives. }]; } def SetMeshPrimitiveIndicesOp : LgcOp<"set.mesh.primitive.indices", [Memory<[]>]> { - let arguments = (ins I32:$primitiveIndex, (ScalarOrFixedVector I32):$primitiveIndices); + let arguments = (ins I32:$primitive_index, (ScalarOrFixedVector I32):$primitive_indices); let results = (outs); let summary = "set primitive indices for mesh shader"; let description = [{ In the mesh shader, set primitive indices by forming primitive connectivity data and writing it to LDS. - `primitiveIndex` is the primitive index specifying which primitive to set. - `primitiveIndices` are all vertex index values that are used to form this primitive. + `primitive_index` is the primitive index specifying which primitive to set. + `primitive_indices` are all vertex index values that are used to form this primitive. }]; } def SetMeshPrimitiveCulledOp : LgcOp<"set.mesh.primitive.culled", [Memory<[]>]> { - let arguments = (ins I32:$primitiveIndex, I1:$isCulled); + let arguments = (ins I32:$primitive_index, I1:$is_culled); let results = (outs); let summary = "set primitive culled state for mesh shader"; let description = [{ In the mesh shader, set primitive culled state by writing the null primitive flag to LDS. - `primitiveIndex` is the primitive index specifying which primitive to set. - `isCulled` is a boolean flag indicating whether this primitive is culled. + `primitive_index` is the primitive index specifying which primitive to set. + `is_culled` is a boolean flag indicating whether this primitive is culled. }]; } @@ -223,41 +223,41 @@ def GetMeshBuiltinInputOp : LgcOp<"get.mesh.builtin.input", [Memory<[]>, WillRet } def WriteMeshVertexOutputOp : LgcOp<"write.mesh.vertex.output", [Memory<[]>]> { - let arguments = (ins I32:$outputOffset, I32:$vertexIndex, value:$outputValue); + let arguments = (ins I32:$output_offset, I32:$vertex_index, value:$output_value); let results = (outs); let summary = "Write mesh shader vertex outputs"; let description = [{ In the mesh shader, write mesh shader vertex outputs to LDS. - `outputOffset` is the relative offset of this output (in dwords) within all outputs of the indexed vertex. - `vertexIndex` is the vertex index specifying which vertex to write. - `outputValue` is the output value to write. + `output_offset` is the relative offset of this output (in dwords) within all outputs of the indexed vertex. + `vertex_index` is the vertex index specifying which vertex to write. + `output_value` is the output value to write. }]; } def WriteMeshPrimitiveOutputOp : LgcOp<"write.mesh.primitive.output", [Memory<[]>]> { - let arguments = (ins I32:$outputOffset, I32:$primitiveIndex, value:$outputValue); + let arguments = (ins I32:$output_offset, I32:$primitive_index, value:$output_value); let results = (outs); let summary = "Write mesh shader primitive outputs"; let description = [{ In the mesh shader, write mesh shader primitive outputs to LDS. - `outputOffset` is the relative offset of this output (in dwords) within all outputs of the indexed primitive. - `primitiveIndex` is the primitive index specifying which primitive to write. - `outputValue` is the output value to write. + `output_offset` is the relative offset of this output (in dwords) within all outputs of the indexed primitive. + `primitive_index` is the primitive index specifying which primitive to write. + `output_value` is the output value to write. }]; } def GenericLocationOp : OpClass { - let arguments = (ins AttrI1:$perPrimitive, AttrI32:$location, I32:$locOffset, I32:$elemIdx, I32:$arrayIndex); + let arguments = (ins AttrI1:$per_primitive, AttrI32:$location, I32:$loc_offset, I32:$elem_idx, I32:$array_index); let summary = "family of operations that reference generic shader I/O locations"; let description = [{ Generic (graphics) shader I/O interfaces are addressed in terms of locations. Locations are 4-element vectors with - 32-bit elements. Within an interface, the location vectors are indexed by `location + locOffset`. The elements of - the vector are indexed by `elemIdx`. + 32-bit elements. Within an interface, the location vectors are indexed by `location + loc_offset`. The elements of + the vector are indexed by `elem_idx`. For some shader stages, the interface is inherently an array. For example, GS inputs are an array of per-vertex inputs, where the size of the array depends on the input primitive type (1 for points, 2 for lines, etc.). @@ -265,8 +265,8 @@ def GenericLocationOp : OpClass { Some shader stages have both array and non-array interfaces. Specifically, TCS outputs and TES inputs have per-patch locations (non-array) and per-control-point locations (array with one entry per control point). - For array interfaces, `arrayIndex` is used to index into the array, e.g. arrayIndex is the control point index (in - TCS outputs and TES inputs) or the vertex index (in GS inputs). For non-array interfaces, `arrayIndex` is ignored + For array interfaces, `array_index` is used to index into the array, e.g. array_index is the control point index (in + TCS outputs and TES inputs) or the vertex index (in GS inputs). For non-array interfaces, `array_index` is ignored entirely and is recommended to be set to `poison`. Some shader stages have a notion of inputs or outputs that are "per primitive": @@ -276,7 +276,7 @@ def GenericLocationOp : OpClass { - PS inputs: per-primitive vs. per-vertex/interpolated (note: old-school flat-interpolated inputs are considered to be interpolated) - TODO: Tease out the exact rules for when `locOffset` is allowed to be non-0. For inputs, this is originally only + TODO: Tease out the exact rules for when `loc_offset` is allowed to be non-0. For inputs, this is originally only TCS, TES, and PS in special cases. }]; } @@ -317,7 +317,7 @@ def OutputImportGenericOp : LgcOp<"output.import.generic", [Memory<[(read)]>, Wi def InputImportInterpolatedOp : LgcOp<"input.import.interpolated", [Memory<[]>, WillReturn]> { let superclass = GenericLocationOp; - let arguments = (ins GenericLocationOp, AttrI32:$interpMode, value:$interpValue); + let arguments = (ins GenericLocationOp, AttrI32:$interp_mode, value:$interp_value); let results = (outs value:$result); let defaultBuilderHasExplicitResultType = true; @@ -326,11 +326,11 @@ def InputImportInterpolatedOp : LgcOp<"input.import.interpolated", [Memory<[]>, let description = [{ Only used in PS for per-vertex/interpolated inputs. Use `input.import.generic` for per-primitive inputs. - `interpMode` is one of: + `interp_mode` is one of: - - InterpModeSmooth for interpolation using the `<2 x float>` barycentrics in `interpValue` - - InterpModeFlat for flat shading; `interpValue` is ignored and is recommended to be `poison` - - InterpModeCustom to retrieve the attribute of the vertex with the `i32` index `interpValue` (which must be 0, 1, + - InterpModeSmooth for interpolation using the `<2 x float>` barycentrics in `interp_value` + - InterpModeFlat for flat shading; `interp_value` is ignored and is recommended to be `poison` + - InterpModeCustom to retrieve the attribute of the vertex with the `i32` index `interp_value` (which must be 0, 1, or 2). The raw HW vertex index is used, which may be different from the API vertex index; it is up to the user of this operation to map between HW and API. }]; @@ -386,3 +386,100 @@ def GroupMemcpyOp : LgcOp<"group.memcpy", [Memory<[]>]> { This operation must only occur in control flow that is uniform for the relevant scope. }]; } + +def SubgroupElectOp : LgcOp<"subgroup.elect", [NoUnwind, Convergent]> { + let arguments = (ins); + let results = (outs I1:$result); + + let summary = "subgroupElect"; + let description = [{ + This is true only in the active invocation with the lowest id in the group, + otherwise `result` is false. + + This is used to implement OpGroupNonUniformElect in the SPIR-V reference, + see there for details. + }]; +} + +def SubgroupAnyOp : LgcOp<"subgroup.any", [NoUnwind, Convergent]> { + let arguments = (ins I1:$value); + let results = (outs I1:$result); + + let summary = "subgroupAny"; + let description = [{ + Evaluates `value` for all invocations in the group. `result` will be true if + and only if it evaluates to true for any invocation in the group. + + This is used to implement OpGroupAny and OpGroupNonUniformAny in the SPIR-V + reference, see there for details. + }]; +} + +def CooperativeRowAccLoadOp : LgcOp<"cooperative.rowacc.load", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$pointer, I32:$stride, AttrI32:$elemType, AttrI32:$memoryAccess); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "load cooperative rowacc from memory"; + let description = [{ + Load contiguous elements from the specified location of the memory. + + Return acc row data in finalized mode. + + 'stride' is the stride in bytes in memory between the first elements in the source data. + 'elemType' is the element type for the row acc. + 'memoryAccess' is the memory operands which provide:isVolatile/isTemporal/isCoherent additional operands, + maybe volatile/Aligned/Nontemporal/MakePointerAvailable + }]; +} + +def CooperativeRowAccStoreOp : LgcOp<"cooperative.rowacc.store", [Memory<[]>]> { + let arguments = (ins value:$pointer, I32:$stride, AttrI32:$elemType, value:$value, AttrI32:$memoryAccess); + let results = (outs); + + let summary = "store cooperative rowacc to memory"; + let description = [{ + Store a contiguous elements from the specified location of the memory. + + 'stride' is the stride in bytes in memory between the first elements in the source data. + 'elemType' is the element type for the row acc. + 'value' is data of row acc, Must be in finalized mode. + 'memoryAccess' is the memory operands which provide:isVolatile/isTemporal/isCoherent additional operands, + maybe volatile/Aligned/Nontemporal/MakePointerAvailable + }]; +} + +def CooperativeRowAccAccumulateModeOp : LgcOp<"cooperative.rowacc.accumulatemode", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$rowAccValue, AttrI32:$elemType); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "change cooperative row acc date mode from finalize mode to accumulate mode"; + let description = [{ + convert the row acc data from finalize mode to accumulate mode. + + Return acc row data in accumulate mode. + + 'rowAccValue' is the input row acc data, must be in finalize mode. + 'elemType' is the element type for the row acc. + }]; +} + +def CooperativeRowAccFinalizeModeOp : LgcOp<"cooperative.rowacc.finalizemode", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$rowAccValue, AttrI32:$elemType); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "change cooperative rowacc date mode from accumulate state to finalize state"; + let description = [{ + convert the row acc data from accumulate mode to finalize mode. + + Return acc row data in finalized mode. + + 'rowAccValue' is the input row acc data, must be in accumulate mode. + 'elemType' is the element type for the row acc. + }]; +} diff --git a/lgc/patch/FragColorExport.cpp b/lgc/patch/FragColorExport.cpp index 427a9fb6f9..643e8968a3 100644 --- a/lgc/patch/FragColorExport.cpp +++ b/lgc/patch/FragColorExport.cpp @@ -55,8 +55,7 @@ namespace lgc { // // @param context : LLVM context // @param pipelineState : Pipeline state -FragColorExport::FragColorExport(LLVMContext *context, PipelineState *pipelineState) - : m_context(context), m_pipelineState(pipelineState) { +FragColorExport::FragColorExport(LgcContext *context) : m_lgcContext(context) { } // ===================================================================================================================== @@ -240,7 +239,7 @@ Value *FragColorExport::handleColorExportInstructions(Value *output, unsigned hw } } - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 11) { + if (m_lgcContext->getTargetInfo().getGfxIpVersion().major >= 11) { if (isDualSource) { // Save them for later dual-source-swizzle m_blendSourceChannels = exportTy->isHalfTy() ? (compCount + 1) / 2 : compCount; @@ -262,7 +261,7 @@ Value *FragColorExport::handleColorExportInstructions(Value *output, unsigned hw Value *exportCall = nullptr; - if (exportTy->isHalfTy() && m_pipelineState->getTargetInfo().getGfxIpVersion().major < 11) { + if (exportTy->isHalfTy() && m_lgcContext->getTargetInfo().getGfxIpVersion().major < 11) { // 16-bit export (compressed) Value *args[] = { builder.getInt32(EXP_TARGET_MRT_0 + hwColorExport), // tgt @@ -273,7 +272,7 @@ Value *FragColorExport::handleColorExportInstructions(Value *output, unsigned hw builder.getTrue() // vm }; - exportCall = builder.CreateNamedCall("llvm.amdgcn.exp.compr.v2f16", Type::getVoidTy(*m_context), args, {}); + exportCall = builder.CreateNamedCall("llvm.amdgcn.exp.compr.v2f16", builder.getVoidTy(), args, {}); } else { Value *args[] = { builder.getInt32(EXP_TARGET_MRT_0 + hwColorExport), // tgt @@ -286,7 +285,7 @@ Value *FragColorExport::handleColorExportInstructions(Value *output, unsigned hw builder.getTrue() // vm }; - exportCall = builder.CreateNamedCall("llvm.amdgcn.exp.f32", Type::getVoidTy(*m_context), args, {}); + exportCall = builder.CreateNamedCall("llvm.amdgcn.exp.f32", builder.getVoidTy(), args, {}); } return exportCall; @@ -423,27 +422,14 @@ Value *FragColorExport::convertToInt(Value *value, bool signedness, BuilderBase PreservedAnalyses LowerFragColorExport::run(Module &module, ModuleAnalysisManager &analysisManager) { PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); PipelineShadersResult &pipelineShaders = analysisManager.getResult(module); - if (runImpl(module, pipelineShaders, pipelineState)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} -// ===================================================================================================================== -// Run the lower color export pass on a module -// -// @param [in/out] module : Module -// @param pipelineShaders : Pipeline shaders analysis result -// @param pipelineState : Pipeline state -// @returns : True if the module was modified by the transformation and false otherwise -bool LowerFragColorExport::runImpl(Module &module, PipelineShadersResult &pipelineShaders, - PipelineState *pipelineState) { m_context = &module.getContext(); m_pipelineState = pipelineState; m_resUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Fragment); Function *fragEntryPoint = pipelineShaders.getEntryPoint(ShaderStage::Fragment); if (!fragEntryPoint) - return false; + return PreservedAnalyses::all(); // Find the return instruction as that will be the insertion point for the export instructions. // It is possible that there is no return instruction if there is an infinite loop. See the shaderdb test @@ -456,7 +442,7 @@ bool LowerFragColorExport::runImpl(Module &module, PipelineShadersResult &pipeli } } if (!retInst) - return false; + return PreservedAnalyses::all(); BuilderBase builder(module.getContext()); builder.SetInsertPoint(retInst); @@ -479,15 +465,15 @@ bool LowerFragColorExport::runImpl(Module &module, PipelineShadersResult &pipeli bool willGenerateColorExportShader = m_pipelineState->isUnlinked() && !m_pipelineState->hasColorExportFormats(); if (willGenerateColorExportShader && !m_info.empty()) { createTailJump(fragEntryPoint, builder, dynamicIsDualSource); - return true; + return PreservedAnalyses::none(); } - FragColorExport fragColorExport(m_context, m_pipelineState); - bool dummyExport = - (m_pipelineState->getTargetInfo().getGfxIpVersion().major < 10 || m_resUsage->builtInUsage.fs.discard); + FragColorExport fragColorExport(m_pipelineState->getLgcContext()); + bool dummyExport = m_resUsage->builtInUsage.fs.discard; + FragColorExport::Key key = FragColorExport::computeKey(m_info, m_pipelineState); fragColorExport.generateExportInstructions(m_info, m_exportValues, dummyExport, m_pipelineState->getPalMetadata(), - builder, dynamicIsDualSource); - return !m_info.empty() || dummyExport; + builder, dynamicIsDualSource, key); + return (!m_info.empty() || dummyExport) ? PreservedAnalyses::none() : PreservedAnalyses::all(); } // ===================================================================================================================== @@ -531,7 +517,7 @@ void LowerFragColorExport::updateFragColors(CallInst *callInst, MutableArrayRef< // @param fragEntryPoint : The fragment shader to which we should add the export instructions. // @param builder : The builder object that will be used to create new instructions. void LowerFragColorExport::collectExportInfoForGenericOutputs(Function *fragEntryPoint, BuilderBase &builder) { - std::unique_ptr fragColorExport(new FragColorExport(m_context, m_pipelineState)); + std::unique_ptr fragColorExport(new FragColorExport(m_pipelineState->getLgcContext())); SmallVector colorExports; // Collect all of the exports in the fragment shader @@ -795,9 +781,8 @@ void FragColorExport::setDoneFlag(Value *exportInst, BuilderBase &builder) { // Swizzle the output to MRT0/MRT1 for dual source blend on GFX11+, and return the last export instruction. // // @param builder : The builder object that will be used to create new instructions. -Value *FragColorExport::dualSourceSwizzle(BuilderBase &builder) { +Value *FragColorExport::dualSourceSwizzle(unsigned waveSize, BuilderBase &builder) { Value *result0[4], *result1[4]; - unsigned waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Fragment); auto undefFloat = PoisonValue::get(builder.getFloatTy()); Value *threadId = @@ -854,7 +839,7 @@ Value *FragColorExport::dualSourceSwizzle(BuilderBase &builder) { builder.getFalse(), // done builder.getTrue() // vm }; - builder.CreateNamedCall("llvm.amdgcn.exp.f32", Type::getVoidTy(*m_context), args0, {}); + builder.CreateNamedCall("llvm.amdgcn.exp.f32", builder.getVoidTy(), args0, {}); Value *args1[] = { builder.getInt32(EXP_TARGET_DUAL_SRC_1), // tgt @@ -866,41 +851,41 @@ Value *FragColorExport::dualSourceSwizzle(BuilderBase &builder) { builder.getFalse(), // done builder.getTrue() // vm }; - return builder.CreateNamedCall("llvm.amdgcn.exp.f32", Type::getVoidTy(*m_context), args1, {}); + return builder.CreateNamedCall("llvm.amdgcn.exp.f32", builder.getVoidTy(), args1, {}); } // ===================================================================================================================== // Update the color export information when enableFragColor is set. // -// @param originExpinfo : The original color export information for each color export in no particular order. +// @param key : Color export info. +// @param originExpinfo : The original color export information for each color export in no particular order.// // @param pCbShaderMask: The cbShaderMask after update color export information // @param [out] outExpinfo : The updated color export information when enableFragColor is true. -void FragColorExport::updateColorExportInfoWithBroadCastInfo(ArrayRef originExpinfo, +void FragColorExport::updateColorExportInfoWithBroadCastInfo(const Key &key, ArrayRef originExpinfo, SmallVector &outExpinfo, unsigned *pCbShaderMask) { // As enableFragColor will only be enabled by OGL, so it will not consider on the dualSource cases. SmallVector broadCastInfo; - if (m_pipelineState->getOptions().enableFragColor) { + if (key.enableFragColor) { auto &expInfo = originExpinfo[0]; assert(expInfo.ty != nullptr); for (unsigned location = 0; location < MaxColorTargets; ++location) { - if (m_pipelineState->getColorExportFormat(location).dfmt != BufDataFormatInvalid) + if (key.expFmt[location] != 0) broadCastInfo.push_back({0, location, expInfo.isSigned, expInfo.ty}); } } - outExpinfo = - m_pipelineState->getOptions().enableFragColor ? broadCastInfo : SmallVector(originExpinfo); + outExpinfo = key.enableFragColor ? broadCastInfo : SmallVector(originExpinfo); for (auto &exp : outExpinfo) { if (exp.hwColorTarget == MaxColorTargets) continue; - const unsigned channelWriteMask = m_pipelineState->getColorExportFormat(exp.location).channelWriteMask; - unsigned gfxIp = m_pipelineState->getTargetInfo().getGfxIpVersion().major; + const unsigned channelWriteMask = key.channelWriteMask[exp.location]; + unsigned gfxIp = m_lgcContext->getTargetInfo().getGfxIpVersion().major; bool needUpdateMask = false; if (exp.location == 0 || gfxIp > 10) { - needUpdateMask = (m_pipelineState->computeExportFormat(exp.ty, exp.location) != 0) && - (channelWriteMask > 0 || m_pipelineState->getColorExportState().alphaToCoverageEnable); + needUpdateMask = + (key.expFmt[exp.location] != 0) && (channelWriteMask > 0 || key.colorExportState.alphaToCoverageEnable); } else { - needUpdateMask = (m_pipelineState->computeExportFormat(exp.ty, exp.location) != 0) && (channelWriteMask > 0); + needUpdateMask = (key.expFmt[exp.location] != 0) && (channelWriteMask > 0); } if (needUpdateMask) { // For dualSource, the cbShaderMask will only be valid for location=0, other locations setting will be @@ -920,11 +905,12 @@ void FragColorExport::updateColorExportInfoWithBroadCastInfo(ArrayRef info, ArrayRef values, bool dummyExport, PalMetadata *palMetadata, BuilderBase &builder, - Value *dynamicIsDualSource) { + Value *dynamicIsDualSource, const Key &key) { Value *lastExport = nullptr; - unsigned gfxip = m_pipelineState->getTargetInfo().getGfxIpVersion().major; + unsigned gfxip = m_lgcContext->getTargetInfo().getGfxIpVersion().major; // MRTZ export comes first if it exists (this is a HW requirement on gfx11+ and an optional good idea on earlier HW). // We make the assume here that it is also first in the info list. @@ -932,8 +918,8 @@ void FragColorExport::generateExportInstructions(ArrayRef info, unsigned depthMask = info[0].location; // Depth export alpha comes from MRT0.a if there is MRT0.a and A2C is enabled on GFX11+ - Value *alpha = PoisonValue::get(Type::getFloatTy(*m_context)); - if (!dummyExport && gfxip >= 11 && m_pipelineState->getColorExportState().alphaToCoverageEnable) { + Value *alpha = PoisonValue::get(builder.getFloatTy()); + if (!dummyExport && gfxip >= 11 && key.colorExportState.alphaToCoverageEnable) { for (auto &curInfo : info) { if (curInfo.location != 0) continue; @@ -988,23 +974,20 @@ void FragColorExport::generateExportInstructions(ArrayRef info, BasicBlock *dualSourceBlock = nullptr; BasicBlock *normalExportBlock = nullptr; - updateColorExportInfoWithBroadCastInfo(info, finalExpInfo, &cbShaderMask); + updateColorExportInfoWithBroadCastInfo(key, info, finalExpInfo, &cbShaderMask); - if (m_pipelineState->getColorExportState().dualSourceBlendDynamicEnable && (gfxip >= 11)) { + if (key.colorExportState.dualSourceBlendDynamicEnable && (gfxip >= 11)) { // For dynamiceState, whether do dualSourceBlend will depend on the user data. - dualSourceBlock = BasicBlock::Create(*m_context, "dualSourceSwizzle", originFunc); - normalExportBlock = BasicBlock::Create(*m_context, "normalExport", originFunc); - Value *staticDualEnable = builder.getInt32(m_pipelineState->getColorExportState().dualSourceBlendEnable); - Value *isDualSource = builder.CreateOr(dynamicIsDualSource, - builder.CreateAnd(staticDualEnable, builder.CreateNot(dynamicIsDualSource))); - isDualSource = builder.CreateICmpNE(dynamicIsDualSource, builder.getInt32(0)); + dualSourceBlock = BasicBlock::Create(m_lgcContext->getContext(), "dualSourceSwizzle", originFunc); + normalExportBlock = BasicBlock::Create(m_lgcContext->getContext(), "normalExport", originFunc); + Value *isDualSource = builder.CreateICmpNE(dynamicIsDualSource, builder.getInt32(0)); builder.CreateCondBr(isDualSource, dualSourceBlock, normalExportBlock); } else { if (retInst) { retInst->eraseFromParent(); retInst = nullptr; } - if (m_pipelineState->getColorExportState().dualSourceBlendEnable && (gfxip >= 11)) + if (key.colorExportState.dualSourceBlendEnable && (gfxip >= 11)) // For only-static case, it will depend on dualSourceBlendEnable flag to do dualSourceBlend. dualSourceBlock = builder.GetInsertBlock(); else @@ -1013,37 +996,34 @@ void FragColorExport::generateExportInstructions(ArrayRef info, } // Construct ".dualSourceSwizzle" Block, only construct when the dynamicEnable is on and staticValue is true. - if ((m_pipelineState->getColorExportState().dualSourceBlendDynamicEnable || - m_pipelineState->getColorExportState().dualSourceBlendEnable) && + if ((key.colorExportState.dualSourceBlendDynamicEnable || key.colorExportState.dualSourceBlendEnable) && (gfxip >= 11)) { builder.SetInsertPoint(dualSourceBlock); - unsigned hwColorExport = 0; - for (unsigned location = 0; location < MaxColorTargets; ++location) { - auto infoIt = llvm::find_if( - finalExpInfo, [&](const ColorExportInfo &finalExpInfo) { return finalExpInfo.location == location; }); - if (infoIt == finalExpInfo.end()) - continue; - assert(infoIt->hwColorTarget < MaxColorTargets); - auto expFmt = static_cast(m_pipelineState->computeExportFormat(infoIt->ty, location, true)); - const unsigned channelWriteMask = m_pipelineState->getColorExportFormat(location, true).channelWriteMask; - bool needExpInst = (expFmt != EXP_FORMAT_ZERO) && - (channelWriteMask > 0 || m_pipelineState->getColorExportState().alphaToCoverageEnable); - if (needExpInst) { - // Collect info for dualSourceBlend and save then in m_blendSources, so set the last parameter=true; - handleColorExportInstructions(values[infoIt->hwColorTarget], hwColorExport, builder, expFmt, infoIt->isSigned, - channelWriteMask, true); - finalExportFormats.push_back(expFmt); - ++hwColorExport; + + for (unsigned idx = 0; idx < 2; idx++) { + auto infoIt = llvm::find_if(finalExpInfo, [&](const ColorExportInfo &info) { return info.location == idx; }); + if (infoIt != finalExpInfo.end()) { + auto dualExpFmt = static_cast(key.dualExpFmt[idx]); + const unsigned channelWriteMask = key.channelWriteMask[0]; + bool needExpInst = + (dualExpFmt != EXP_FORMAT_ZERO) && (channelWriteMask > 0 || key.colorExportState.alphaToCoverageEnable); + if (needExpInst) { + // Collect info for dualSourceBlend and save then in m_blendSources, so set the last parameter=true; + handleColorExportInstructions(values[infoIt->hwColorTarget], idx, builder, dualExpFmt, infoIt->isSigned, + channelWriteMask, true); + finalExportFormats.push_back(dualExpFmt); + } } } - lastExport = dualSourceSwizzle(builder); + + lastExport = dualSourceSwizzle(key.waveSize, builder); FragColorExport::setDoneFlag(lastExport, builder); builder.CreateRetVoid(); } // Construct ".normalExport" Block - if (m_pipelineState->getColorExportState().dualSourceBlendDynamicEnable || - !m_pipelineState->getColorExportState().dualSourceBlendEnable || (gfxip < 11)) { + if (key.colorExportState.dualSourceBlendDynamicEnable || !key.colorExportState.dualSourceBlendEnable || + (gfxip < 11)) { builder.SetInsertPoint(normalExportBlock); unsigned hwColorExport = 0; for (unsigned location = 0; location < MaxColorTargets; ++location) { @@ -1052,10 +1032,10 @@ void FragColorExport::generateExportInstructions(ArrayRef info, if (infoIt == finalExpInfo.end()) continue; assert(infoIt->hwColorTarget < MaxColorTargets); - const unsigned channelWriteMask = m_pipelineState->getColorExportFormat(location).channelWriteMask; - auto expFmt = static_cast(m_pipelineState->computeExportFormat(infoIt->ty, location)); - bool needExpInst = (expFmt != EXP_FORMAT_ZERO) && - (channelWriteMask > 0 || m_pipelineState->getColorExportState().alphaToCoverageEnable); + const unsigned channelWriteMask = key.channelWriteMask[location]; + auto expFmt = static_cast(key.expFmt[location]); + bool needExpInst = + (expFmt != EXP_FORMAT_ZERO) && (channelWriteMask > 0 || key.colorExportState.alphaToCoverageEnable); if (needExpInst) { // Don't collect info for dualSourceBlend just do normal color export, so set the last parameter=false; lastExport = handleColorExportInstructions(values[infoIt->hwColorTarget], hwColorExport, builder, expFmt, @@ -1147,3 +1127,48 @@ void FragColorExport::generateNullFragmentShaderBody(llvm::Function *entryPoint) BuilderBase builder(block); builder.CreateRetVoid(); } + +// ===================================================================================================================== +// Compute color export info. +// +// @param info : The color export information for each color export in no particular order. +// @param pipelineState : Pipeline state +// @returns : Color export info. +FragColorExport::Key FragColorExport::computeKey(ArrayRef infos, PipelineState *pipelineState) { + FragColorExport::Key key = {}; + key.enableFragColor = pipelineState->getOptions().enableFragColor; + key.colorExportState = pipelineState->getColorExportState(); + key.waveSize = pipelineState->getShaderWaveSize(ShaderStage::Fragment); + + if (!infos.empty() && infos[0].hwColorTarget == MaxColorTargets) { + infos = infos.drop_front(1); + } + + if (key.enableFragColor) { + auto &expInfo = infos[0]; + assert(expInfo.ty != nullptr); + for (unsigned location = 0; location < MaxColorTargets; ++location) { + if (pipelineState->getColorExportFormat(location).dfmt != BufDataFormatInvalid) { + key.expFmt[location] = pipelineState->computeExportFormat(expInfo.ty, location); + key.channelWriteMask[location] = pipelineState->getColorExportFormat(location).channelWriteMask; + } + } + } else { + for (auto &info : infos) { + key.expFmt[info.location] = pipelineState->computeExportFormat(info.ty, info.location); + key.channelWriteMask[info.location] = pipelineState->getColorExportFormat(info.location).channelWriteMask; + } + } + + if ((pipelineState->getTargetInfo().getGfxIpVersion().major >= 11) && + (key.colorExportState.dualSourceBlendEnable || key.colorExportState.dualSourceBlendDynamicEnable)) { + auto info0It = llvm::find_if(infos, [&](const ColorExportInfo &info) { return info.location == 0; }); + assert(info0It != infos.end()); + key.dualExpFmt[0] = pipelineState->computeExportFormat(info0It->ty, 0, true); + auto info1It = llvm::find_if(infos, [&](const ColorExportInfo &info) { return info.location == 1; }); + if (info1It != infos.end()) + key.dualExpFmt[1] = pipelineState->computeExportFormat(info1It->ty, 1, true); + } + + return key; +} diff --git a/lgc/patch/LowerCooperativeMatrix.cpp b/lgc/patch/LowerCooperativeMatrix.cpp index d8c23464da..8f0954e340 100644 --- a/lgc/patch/LowerCooperativeMatrix.cpp +++ b/lgc/patch/LowerCooperativeMatrix.cpp @@ -31,9 +31,11 @@ #include "lgc/patch/LowerCooperativeMatrix.h" #include "lgc/Builder.h" #include "lgc/LgcContext.h" +#include "lgc/LgcDialect.h" #include "lgc/state/IntrinsDefs.h" #include "lgc/state/PipelineShaders.h" #include "lgc/state/PipelineState.h" +#include "llvm-dialects/Dialect/Visitor.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/Module.h" @@ -56,22 +58,6 @@ PreservedAnalyses LowerCooperativeMatrix::run(Module &module, ModuleAnalysisMana PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); PipelineShadersResult &pipelineShaders = analysisManager.getResult(module); - if (runImpl(module, pipelineShaders, pipelineState)) { - PreservedAnalyses PA; - PA.preserveSet(); - return PA; - } - return PreservedAnalyses::all(); -} - -// ===================================================================================================================== -// Run the on a module -// -// @param [in/out] module : LLVM module to be run on -// @param pipelineState : Pipeline state -// @returns : True if the module was modified by the transformation and false otherwise -bool LowerCooperativeMatrix::runImpl(Module &module, PipelineShadersResult &pipelineShaders, - PipelineState *pipelineState) { LLVM_DEBUG(dbgs() << "Run the pass Patch-Cooperative-Matrix\n"); Patch::init(&module); m_pipelineState = pipelineState; @@ -79,6 +65,8 @@ bool LowerCooperativeMatrix::runImpl(Module &module, PipelineShadersResult &pipe m_shaderStage = ShaderStage::Compute; m_gfxIp = m_pipelineState->getTargetInfo().getGfxIpVersion(); + processCoopRowAccFunction(module); + SmallVector lowerCoopMatrixCallees; for (auto &func : module) { auto name = func.getName(); @@ -86,7 +74,7 @@ bool LowerCooperativeMatrix::runImpl(Module &module, PipelineShadersResult &pipe lowerCoopMatrixCallees.push_back(&func); } if (lowerCoopMatrixCallees.empty()) - return false; + return PreservedAnalyses::all(); processCoopMatrixFunction(lowerCoopMatrixCallees); @@ -95,7 +83,10 @@ bool LowerCooperativeMatrix::runImpl(Module &module, PipelineShadersResult &pipe callInst->eraseFromParent(); } m_coopMatrixCalls.clear(); - return true; + + PreservedAnalyses PA; + PA.preserveSet(); + return PA; } // ===================================================================================================================== @@ -2004,4 +1995,235 @@ Value *LowerCooperativeMatrix::getLaneNumber(BuilderBase &builder) { return result; } +// ===================================================================================================================== +// Visit "CooperativeRowAccLoadOp" instruction +// +// @param inst : The dialect instruction to process +void LowerCooperativeMatrix::visitCooperativeRowAccLoadOp(CooperativeRowAccLoadOp &load) { + BuilderBase builder(*m_context); + builder.SetInsertPoint(&load); + + auto dataPtr = load.getPointer(); + auto stride = load.getStride(); + auto elemType = static_cast(load.getElemType()); + auto memoryAccess = load.getMemoryAccess(); + + assert(builder.transCooperativeMatrixElementType(elemType) == load.getType()); + + // Calc element offset in memory + Type *elemTy = builder.transCooperativeMatrixElementType(elemType); + const unsigned dataBitwidth = elemTy->getScalarSizeInBits(); + const unsigned addrSpace = dataPtr->getType()->getPointerAddressSpace(); + assert(addrSpace == ADDR_SPACE_LOCAL || addrSpace == ADDR_SPACE_BUFFER_FAT_POINTER || addrSpace == ADDR_SPACE_GLOBAL); + + stride = builder.CreateExactSDiv(stride, builder.getInt32(dataBitwidth / 8)); + + // calc memoryAccess + bool isVolatile = memoryAccess & Builder::MemoryAccessVolatileMask; + bool isCoherent = memoryAccess & Builder::MemoryAccessCoherentMask; + bool isTemporal = memoryAccess & Builder::MemoryAccessTemporalMask; + + Value *threadId = getLaneNumber(builder); + Value *colOffsetPerLane = builder.CreateSRem(threadId, builder.getInt32(16)); + Value *offset = builder.CreateMul(colOffsetPerLane, stride); + + Value *elemPtr = builder.CreateGEP(elemTy, dataPtr, offset); + Value *elemVal = builder.CreateLoad(elemTy, elemPtr, isVolatile); + if (isCoherent && !(addrSpace == ADDR_SPACE_LOCAL && dataBitwidth < 32)) + cast(elemVal)->setAtomic(AtomicOrdering::Unordered); + if (isTemporal) + cast(elemVal)->setMetadata(LLVMContext::MD_nontemporal, MDNode::get(builder.getContext(), {})); + + m_coopRowAccCalls.push_back(&load); + load.replaceAllUsesWith(elemVal); +} + +// ===================================================================================================================== +// Visit "CooperativeRowAccStoreOp" instruction +// +// @param inst : The dialect instruction to process +void LowerCooperativeMatrix::visitCooperativeRowAccStoreOp(CooperativeRowAccStoreOp &store) { + BuilderBase builder(*m_context); + builder.SetInsertPoint(&store); + + auto dataPtr = store.getPointer(); + auto stride = store.getStride(); + auto elemType = static_cast(store.getElemType()); + auto memoryAccess = store.getMemoryAccess(); + auto val = store.getValue(); + + assert(builder.transCooperativeMatrixElementType(elemType) == val->getType()); + + // Calc element offset in memory + Type *elemTy = builder.transCooperativeMatrixElementType(elemType); + const unsigned dataBitwidth = elemTy->getScalarSizeInBits(); + const unsigned addrSpace = dataPtr->getType()->getPointerAddressSpace(); + assert(addrSpace == ADDR_SPACE_LOCAL || addrSpace == ADDR_SPACE_BUFFER_FAT_POINTER || addrSpace == ADDR_SPACE_GLOBAL); + + stride = builder.CreateExactSDiv(stride, builder.getInt32(dataBitwidth / 8)); + + // calc memoryAccess + bool isVolatile = memoryAccess & Builder::MemoryAccessVolatileMask; + bool isCoherent = memoryAccess & Builder::MemoryAccessCoherentMask; + bool isTemporal = memoryAccess & Builder::MemoryAccessTemporalMask; + + Value *threadId = getLaneNumber(builder); + Value *colOffsetPerLane = builder.CreateSRem(threadId, builder.getInt32(16)); + Value *offset = builder.CreateMul(colOffsetPerLane, stride); + + Value *elemPtr = builder.CreateGEP(elemTy, dataPtr, offset); + Value *elemVal = builder.CreateStore(val, elemPtr, isVolatile); + if (isCoherent && !(addrSpace == ADDR_SPACE_LOCAL && dataBitwidth < 32)) + cast(elemVal)->setAtomic(AtomicOrdering::Unordered); + if (isTemporal) + cast(elemVal)->setMetadata(LLVMContext::MD_nontemporal, MDNode::get(builder.getContext(), {})); + + m_coopRowAccCalls.push_back(&store); +} + +// ===================================================================================================================== +// Visit "CooperativeRowAccAccumulateModeOp" instruction +// +// @param inst : The dialect instruction to process +void LowerCooperativeMatrix::visitCooperativeRowAccAccumulateModeOp(CooperativeRowAccAccumulateModeOp &accumulateMode) { + BuilderBase builder(*m_context); + builder.SetInsertPoint(&accumulateMode); + + Value *rowAccValue = accumulateMode.getRowAccValue(); + auto elemType = static_cast(accumulateMode.getElemType()); + + assert(builder.transCooperativeMatrixElementType(elemType) == accumulateMode.getType()); + assert(accumulateMode.getType() == rowAccValue->getType()); + + if (m_gfxIp.major >= 12) { + rowAccValue = cooperativeRowAccConvertToAccumulateMode(builder, getLaneNumber(builder), rowAccValue, elemType); + } + + accumulateMode.replaceAllUsesWith(rowAccValue); + m_coopRowAccCalls.push_back(&accumulateMode); +} + +// ===================================================================================================================== +// Visit "CooperativeRowAccFinalizeModeOp" instruction +// +// @param inst : The dialect instruction to process +void LowerCooperativeMatrix::visitCooperativeRowAccFinalizeModeOp(CooperativeRowAccFinalizeModeOp &finalize) { + BuilderBase builder(*m_context); + builder.SetInsertPoint(&finalize); + + Value *rowAccValue = finalize.getRowAccValue(); + auto elemType = static_cast(finalize.getElemType()); + + assert(builder.transCooperativeMatrixElementType(elemType) == finalize.getType()); + assert(finalize.getType() == rowAccValue->getType()); + + if (m_gfxIp.major >= 12) + rowAccValue = cooperativeRowAccConvertToFinalizeMode(builder, rowAccValue, elemType); + + finalize.replaceAllUsesWith(rowAccValue); + m_coopRowAccCalls.push_back(&finalize); +} + +// ===================================================================================================================== +// Convert row acc to finalize mode by adding the interleave 16 lanes. +// +// @param builder : The IR builder to create and insert IR instruction +// @param rowAccVal : The cooperative rowAcc value +// @param elemType : The component type of the rowAcc value +Value *LowerCooperativeMatrix::cooperativeRowAccConvertToFinalizeMode(BuilderBase &builder, llvm::Value *rowAccVal, + Builder::CooperativeMatrixElementType elemType) { + unsigned LaneSelBits[2] = {0x76543210, 0xfedcba98}; + auto mapFuncX16 = [](BuilderBase &builder, ArrayRef mappedArgs, + ArrayRef passthroughArgs) -> Value * { + Type *const int32Ty = builder.getInt32Ty(); + + return builder.CreateIntrinsic( + int32Ty, Intrinsic::amdgcn_permlanex16, + {mappedArgs[0], mappedArgs[1], passthroughArgs[0], passthroughArgs[1], passthroughArgs[2], passthroughArgs[3]}); + }; + + Value *swapped = builder.CreateMapToSimpleType( + mapFuncX16, + { + rowAccVal, + rowAccVal, + }, + {builder.getInt32(LaneSelBits[0]), builder.getInt32(LaneSelBits[1]), builder.getFalse(), builder.getFalse()}); + + switch (elemType) { + case Builder::CooperativeMatrixElementType::Float32: + case Builder::CooperativeMatrixElementType::Float16: + rowAccVal = builder.CreateFAdd(rowAccVal, swapped); + break; + case Builder::CooperativeMatrixElementType::Int32: + rowAccVal = builder.CreateAdd(rowAccVal, swapped); + break; + case Builder::CooperativeMatrixElementType::Int8: + case Builder::CooperativeMatrixElementType::Int16: + case Builder::CooperativeMatrixElementType::Float16Packed: + llvm_unreachable("not supported element type for row acc"); + default: + llvm_unreachable("unknown element type"); + } + + return rowAccVal; +} + +// ===================================================================================================================== +// Convert row acc to accumulate mode by force set zero on the duplicated lanes in each 32 waves. +// +// @param builder : The IR builder to create and insert IR instruction +// @param rowAccVal : The cooperative rowAcc value +// @param threadId : The current lane index +// @param elemType : The component type of the rowAcc value +Value * +LowerCooperativeMatrix::cooperativeRowAccConvertToAccumulateMode(BuilderBase &builder, llvm::Value *rowAccVal, + llvm::Value *threadId, + Builder::CooperativeMatrixElementType elemType) { + Value *zero = nullptr; + switch (elemType) { + case Builder::CooperativeMatrixElementType::Float32: + zero = builder.getFpConstant(builder.getFloatTy(), APFloat(0.0)); + break; + case Builder::CooperativeMatrixElementType::Float16: + zero = builder.getFpConstant(builder.getHalfTy(), APFloat(0.0)); + break; + case Builder::CooperativeMatrixElementType::Int32: + zero = builder.getInt32(0); + break; + case Builder::CooperativeMatrixElementType::Int8: + case Builder::CooperativeMatrixElementType::Int16: + case Builder::CooperativeMatrixElementType::Float16Packed: + llvm_unreachable("not supported element type for cooperative row acc"); + default: + llvm_unreachable("unknown element type"); + } + + Value *laneGroupIdx = builder.CreateUDiv(threadId, builder.getInt32(16)); + Value *isEvenGroup = builder.CreateICmpEQ(builder.CreateAnd(laneGroupIdx, builder.getInt32(1)), builder.getInt32(0)); + return builder.CreateSelect(isEvenGroup, rowAccVal, zero); +} + +// ===================================================================================================================== +// Process all the cooperative row acc operations on module +// +// @param [in/out] module : LLVM module to be run on +void LowerCooperativeMatrix::processCoopRowAccFunction(Module &module) { + static auto visitor = llvm_dialects::VisitorBuilder() + .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) + .add(&LowerCooperativeMatrix::visitCooperativeRowAccLoadOp) + .add(&LowerCooperativeMatrix::visitCooperativeRowAccStoreOp) + .add(&LowerCooperativeMatrix::visitCooperativeRowAccAccumulateModeOp) + .add(&LowerCooperativeMatrix::visitCooperativeRowAccFinalizeModeOp) + .build(); + + visitor.visit(*this, module); + + for (auto callInst : m_coopRowAccCalls) { + callInst->dropAllReferences(); + callInst->eraseFromParent(); + } + m_coopRowAccCalls.clear(); +} + } // namespace lgc diff --git a/lgc/patch/LowerGpuRt.cpp b/lgc/patch/LowerGpuRt.cpp index b551128b6c..abcc8d2733 100644 --- a/lgc/patch/LowerGpuRt.cpp +++ b/lgc/patch/LowerGpuRt.cpp @@ -76,6 +76,7 @@ PreservedAnalyses LowerGpuRt::run(Module &module, ModuleAnalysisManager &analysi .add(&LowerGpuRt::visitGetTriangleCompressionMode) .add(&LowerGpuRt::visitGetFlattenedGroupThreadId) .add(&LowerGpuRt::visitFloatWithRoundMode) + .add(&LowerGpuRt::visitGpurtDispatchThreadIdFlatOp) .build(); visitor.visit(*this, module); @@ -412,4 +413,35 @@ void LowerGpuRt::visitGetFlattenedGroupThreadId(GpurtGetFlattenedGroupThreadIdOp m_funcsToLower.insert(inst.getCalledFunction()); } +// ===================================================================================================================== +// Visit "GpurtDispatchThreadIdFlatOp" instruction +// +// @param inst : The dialect instruction to process +void LowerGpuRt::visitGpurtDispatchThreadIdFlatOp(GpurtDispatchThreadIdFlatOp &inst) { + m_builder->SetInsertPoint(&inst); + auto stage = getShaderStage(m_builder->GetInsertBlock()->getParent()); + Value *flatDispatchId = nullptr; + if (stage == ShaderStage::Compute) { + auto numGroup = m_builder->CreateReadBuiltInInput(lgc::BuiltInNumWorkgroups); + auto groupSize = m_builder->CreateReadBuiltInInput(lgc::BuiltInWorkgroupSize); + auto dispatchSize = m_builder->CreateMul(numGroup, groupSize); + auto sizeX = m_builder->CreateExtractElement(dispatchSize, uint64_t(0)); + auto sizeY = m_builder->CreateExtractElement(dispatchSize, 1); + auto sizeXY = m_builder->CreateMul(sizeX, sizeY); + auto dispatchId = m_builder->CreateReadBuiltInInput(lgc::BuiltInGlobalInvocationId); + auto dispatchX = m_builder->CreateExtractElement(dispatchId, uint64_t(0)); + auto dispatchY = m_builder->CreateExtractElement(dispatchId, 1); + auto dispatchZ = m_builder->CreateExtractElement(dispatchId, 2); + flatDispatchId = m_builder->CreateMul(dispatchZ, sizeXY); + flatDispatchId = m_builder->CreateAdd(flatDispatchId, m_builder->CreateMul(dispatchY, sizeX)); + flatDispatchId = m_builder->CreateAdd(flatDispatchId, dispatchX); + } else { + flatDispatchId = getThreadIdInGroup(); + } + + inst.replaceAllUsesWith(flatDispatchId); + m_callsToLower.push_back(&inst); + m_funcsToLower.insert(inst.getCalledFunction()); +} + } // namespace lgc diff --git a/lgc/patch/LowerSubgroupOps.cpp b/lgc/patch/LowerSubgroupOps.cpp new file mode 100644 index 0000000000..f22a74aff4 --- /dev/null +++ b/lgc/patch/LowerSubgroupOps.cpp @@ -0,0 +1,149 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file LowerSubgroupOps.cpp + * @brief The lgc::LowerSubgroupOps pass lowers subgroup operations represented as dialect ops to LLVM IR + *********************************************************************************************************************** + */ + +#include "lgc/patch/LowerSubgroupOps.h" +#include "ShaderMerger.h" +#include "lgc/LgcContext.h" +#include "lgc/LgcDialect.h" +#include "lgc/builder/BuilderImpl.h" +#include "lgc/patch/ShaderInputs.h" +#include "lgc/state/AbiMetadata.h" +#include "lgc/state/AbiUnlinked.h" +#include "lgc/state/IntrinsDefs.h" +#include "lgc/state/PalMetadata.h" +#include "lgc/state/PipelineShaders.h" +#include "lgc/state/PipelineState.h" +#include "lgc/state/TargetInfo.h" +#include "lgc/util/AddressExtender.h" +#include "lgc/util/BuilderBase.h" +#include "llvm-dialects/Dialect/Visitor.h" +#include "llvm/Analysis/AliasAnalysis.h" // for MemoryEffects +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include + +#define DEBUG_TYPE "lgc-lower-subgroup-ops" + +using namespace llvm; +using namespace lgc; + +namespace lgc { + +class SubgroupLoweringBuilder : public BuilderImpl { +public: + SubgroupLoweringBuilder(Pipeline *pipeline) : BuilderImpl(pipeline) {} + + // ===================================================================================================================== + // Create a subgroup elect. + // + // @param instName : Name to give instruction(s) + llvm::Value *CreateSubgroupElect(const llvm::Twine &instName = ""); + + // Create a subgroup any + // + // @param value : The value to compare + // @param instName : Name to give instruction(s) + llvm::Value *CreateSubgroupAny(llvm::Value *const value, const llvm::Twine &instName = ""); +}; + +// ===================================================================================================================== +// Create a subgroup elect call. +// +// @param instName : Name to give final instruction. +Value *SubgroupLoweringBuilder::CreateSubgroupElect(const Twine &instName) { + return CreateICmpEQ(CreateSubgroupMbcnt(createGroupBallot(getTrue()), ""), getInt32(0)); +} + +// ===================================================================================================================== +// Create a subgroup any call. +// +// @param value : The value to compare across the subgroup. Must be an integer type. +// @param instName : Name to give final instruction. +Value *SubgroupLoweringBuilder::CreateSubgroupAny(Value *const value, const Twine &instName) { + Value *result = CreateICmpNE(createGroupBallot(value), getInt64(0)); + result = CreateSelect(CreateUnaryIntrinsic(Intrinsic::is_constant, value), value, result); + + // Helper invocations of whole quad mode should be included in the subgroup vote execution + const auto &fragmentMode = m_pipelineState->getShaderModes()->getFragmentShaderMode(); + if (m_shaderStage == ShaderStage::Fragment && !fragmentMode.waveOpsExcludeHelperLanes) { + result = CreateZExt(result, getInt32Ty()); + result = CreateIntrinsic(Intrinsic::amdgcn_softwqm, {getInt32Ty()}, {result}); + result = CreateTrunc(result, getInt1Ty()); + } + return result; +} + +// ===================================================================================================================== +// Executes this LLVM patching pass on the specified LLVM module. +// +// @param [in/out] module : LLVM module to be run on +// @param [in/out] analysisManager : Analysis manager to use for this transformation +// @returns : The preserved analyses (The analyses that are still valid after this pass) +PreservedAnalyses LowerSubgroupOps::run(Module &module, ModuleAnalysisManager &analysisManager) { + PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); + // PipelineShadersResult &pipelineShaders = analysisManager.getResult(module); + LLVM_DEBUG(dbgs() << "Run the pass lower subgroup ops\n"); + + m_pipelineState = pipelineState; + + SubgroupLoweringBuilder builder(m_pipelineState); + m_builder = &builder; + static const auto visitor = llvm_dialects::VisitorBuilder() + .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) + .add(&LowerSubgroupOps::visitElect) + .add(&LowerSubgroupOps::visitAny) + .build(); + visitor.visit(*this, module); + m_builder = nullptr; + + return PreservedAnalyses::none(); +} + +void LowerSubgroupOps::replace(CallInst &old, Value *op) { + old.replaceAllUsesWith(op); + old.dropAllReferences(); + old.eraseFromParent(); +} + +void LowerSubgroupOps::visitElect(SubgroupElectOp &op) { + m_builder->SetInsertPoint(&op); + replace(op, m_builder->CreateSubgroupElect()); +} + +void LowerSubgroupOps::visitAny(SubgroupAnyOp &op) { + m_builder->SetInsertPoint(&op); + replace(op, m_builder->CreateSubgroupAny(op.getValue())); +} + +} // namespace lgc diff --git a/lgc/patch/NggPrimShader.cpp b/lgc/patch/NggPrimShader.cpp index 29763d5c3b..e67492df1c 100644 --- a/lgc/patch/NggPrimShader.cpp +++ b/lgc/patch/NggPrimShader.cpp @@ -795,20 +795,6 @@ FunctionType *NggPrimShader::getPrimShaderType(uint64_t &inRegMask) { llvm_unreachable("Not implemented!"); } - // If the ES is the API VS, and it is a fetchless VS, then we need to add args for the vertex fetches. - if (!m_hasTes) { - unsigned vertexFetchCount = m_pipelineState->getPalMetadata()->getVertexFetchCount(); - if (vertexFetchCount != 0) { - // TODO: This will not work with non-GS culling. - if (!m_hasGs && !m_nggControl->passthroughMode) - m_pipelineState->setError("Fetchless VS in NGG culling mode (without API GS) not supported"); - // The final vertexFetchCount args of the ES (API VS) are the vertex fetches. - unsigned esArgSize = m_esHandlers.main->arg_size(); - for (unsigned idx = esArgSize - vertexFetchCount; idx != esArgSize; ++idx) - argTys.push_back(m_esHandlers.main->getArg(idx)->getType()); - } - } - return FunctionType::get(m_builder.getVoidTy(), argTys, false); } @@ -3139,20 +3125,6 @@ void NggPrimShader::runEs(ArrayRef args) { esArgs.push_back(relVertexId); esArgs.push_back(vsPrimitiveId); esArgs.push_back(instanceId); - - // When tessellation is not enabled, the ES is actually a fetchless VS. Then, we need to add arguments for the - // vertex fetches. Also set the name of each vertex fetch primitive shader argument while we're here. - unsigned vertexFetchCount = m_pipelineState->getPalMetadata()->getVertexFetchCount(); - if (vertexFetchCount > 0) { - ArrayRef vertexFetches = vgprArgs.drop_front(m_gfxIp.major <= 11 ? 9 : 5); - assert(vertexFetches.size() == vertexFetchCount); - - for (unsigned i = 0; i < vertexFetchCount; ++i) { - vertexFetches[i]->setName(m_esHandlers.main->getArg(m_esHandlers.main->arg_size() - vertexFetchCount + i) - ->getName()); // Copy argument name - esArgs.push_back(vertexFetches[i]); - } - } } assert(esArgs.size() == m_esHandlers.main->arg_size()); // Must have visit all arguments of ES entry point @@ -7522,23 +7494,6 @@ Value *NggPrimShader::fetchXfbOutput(Function *target, ArrayRef args xfbFetcherArgs.push_back(relVertexId); xfbFetcherArgs.push_back(vsPrimitiveId); xfbFetcherArgs.push_back(instanceId); - - if (m_nggControl->passthroughMode) { - // When tessellation is not enabled, the transform feedback fetch function is actually a fetchless VS. Then, we - // need to add arguments for the vertex fetches. Also set the name of each vertex fetch primitive shader argument - // while we're here. - unsigned vertexFetchCount = m_pipelineState->getPalMetadata()->getVertexFetchCount(); - if (vertexFetchCount > 0) { - ArrayRef vertexFetches = vgprArgs.drop_front(m_gfxIp.major <= 11 ? 9 : 5); - assert(vertexFetches.size() == vertexFetchCount); - - for (unsigned i = 0; i < vertexFetchCount; ++i) { - vertexFetches[i]->setName( - xfbFetcher->getArg(xfbFetcher->arg_size() - vertexFetchCount + i)->getName()); // Copy argument name - xfbFetcherArgs.push_back(vertexFetches[i]); - } - } - } } assert(xfbFetcherArgs.size() == xfbFetcher->arg_size()); // Must have visit all arguments diff --git a/lgc/patch/PassRegistry.inc b/lgc/patch/PassRegistry.inc index 165f02e918..f3750af730 100644 --- a/lgc/patch/PassRegistry.inc +++ b/lgc/patch/PassRegistry.inc @@ -66,6 +66,7 @@ LLPC_MODULE_PASS("lgc-patch-prepare-pipeline-abi", PatchPreparePipelineAbi) LLPC_FUNCTION_PASS("lgc-patch-read-first-lane", PatchReadFirstLane) LLPC_MODULE_PASS("lgc-patch-llvm-ir-inclusion", PatchLlvmIrInclusion) LLPC_FUNCTION_PASS("lgc-patch-peephole-opt", PatchPeepholeOpt) +LLPC_MODULE_PASS("lgc-lower-subgroup-ops", LowerSubgroupOps) LLPC_MODULE_PASS("lgc-patch-entry-point-mutate", PatchEntryPointMutate) LLPC_MODULE_PASS("lgc-patch-check-shader-cache", PatchCheckShaderCache) LLPC_LOOP_PASS("lgc-patch-loop-metadata", PatchLoopMetadata) diff --git a/lgc/patch/Patch.cpp b/lgc/patch/Patch.cpp index ba78672338..e626b4364a 100644 --- a/lgc/patch/Patch.cpp +++ b/lgc/patch/Patch.cpp @@ -42,6 +42,7 @@ #include "lgc/patch/LowerDebugPrintf.h" #include "lgc/patch/LowerDesc.h" #include "lgc/patch/LowerGpuRt.h" +#include "lgc/patch/LowerSubgroupOps.h" #include "lgc/patch/PatchBufferOp.h" #include "lgc/patch/PatchCheckShaderCache.h" #include "lgc/patch/PatchCopyShader.h" @@ -144,6 +145,7 @@ void Patch::addPasses(PipelineState *pipelineState, lgc::PassManager &passMgr, T // We're using BuilderRecorder; replay the Builder calls now passMgr.addPass(BuilderReplayer()); + passMgr.addPass(LowerSubgroupOps()); if (raw_ostream *outs = getLgcOuts()) { passMgr.addPass(PrintModulePass(*outs, diff --git a/lgc/patch/PatchCheckShaderCache.cpp b/lgc/patch/PatchCheckShaderCache.cpp index 17ca81ae71..bf450bbe53 100644 --- a/lgc/patch/PatchCheckShaderCache.cpp +++ b/lgc/patch/PatchCheckShaderCache.cpp @@ -69,23 +69,12 @@ PatchCheckShaderCache::PatchCheckShaderCache(Pipeline::CheckShaderCacheFunc call // @returns : The preserved analyses (The analyses that are still valid after this pass) PreservedAnalyses PatchCheckShaderCache::run(Module &module, ModuleAnalysisManager &analysisManager) { PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); - if (runImpl(module, pipelineState)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} -// ===================================================================================================================== -// Executes this LLVM patching pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -// @param [in/out] pipelineState : Pipeline state object to use for this pass -// @returns : True if the module was modified by the transformation and false otherwise -bool PatchCheckShaderCache::runImpl(Module &module, PipelineState *pipelineState) { LLVM_DEBUG(dbgs() << "Run the pass Patch-Check-Shader-Cache\n"); if (m_callbackFunc == nullptr) { // No shader cache in use. - return false; + return PreservedAnalyses::all(); } Patch::init(&module); @@ -136,7 +125,7 @@ bool PatchCheckShaderCache::runImpl(Module &module, PipelineState *pipelineState // Ask callback function if it wants to remove any shader stages. auto stagesLeftToCompile = m_callbackFunc(&module, stageMask, inOutUsageValues); if (stagesLeftToCompile == stageMask) - return false; + return PreservedAnalyses::all(); // "Remove" a shader stage by making its entry-point function an external but not DLLExport declaration, so further // passes no longer treat it as an entry point (based on the DLL storage class) and don't attempt to compile any code @@ -150,5 +139,5 @@ bool PatchCheckShaderCache::runImpl(Module &module, PipelineState *pipelineState } } } - return true; + return PreservedAnalyses::none(); } diff --git a/lgc/patch/PatchCopyShader.cpp b/lgc/patch/PatchCopyShader.cpp index a57cd6cb7e..524141b467 100644 --- a/lgc/patch/PatchCopyShader.cpp +++ b/lgc/patch/PatchCopyShader.cpp @@ -57,19 +57,7 @@ using namespace llvm; PreservedAnalyses PatchCopyShader::run(Module &module, ModuleAnalysisManager &analysisManager) { PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); PipelineShadersResult &pipelineShaders = analysisManager.getResult(module); - if (runImpl(module, pipelineShaders, pipelineState)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} -// ===================================================================================================================== -// Run the pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -// @param pipelineShaders : Pipeline shaders analysis result -// @param pipelineState : Pipeline state -// @returns : True if the module was modified by the transformation and false otherwise -bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineShaders, PipelineState *pipelineState) { LLVM_DEBUG(dbgs() << "Run the pass Patch-Copy-Shader\n"); Patch::init(&module); @@ -80,7 +68,7 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha auto gsEntryPoint = pipelineShaders.getEntryPoint(ShaderStage::Geometry); if (!gsEntryPoint) { // Skip copy shader generation if GS is absent - return false; + return PreservedAnalyses::all(); } // Tell pipeline state there is a copy shader. @@ -297,7 +285,7 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha builder.CreateBr(endBlock); } - return true; + return PreservedAnalyses::none(); } // ===================================================================================================================== diff --git a/lgc/patch/PatchEntryPointMutate.cpp b/lgc/patch/PatchEntryPointMutate.cpp index 4f8179e804..fc677a67ea 100644 --- a/lgc/patch/PatchEntryPointMutate.cpp +++ b/lgc/patch/PatchEntryPointMutate.cpp @@ -113,19 +113,7 @@ PatchEntryPointMutate::UserDataArg::UserDataArg(llvm::Type *argTy, const llvm::T PreservedAnalyses PatchEntryPointMutate::run(Module &module, ModuleAnalysisManager &analysisManager) { PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); PipelineShadersResult &pipelineShaders = analysisManager.getResult(module); - runImpl(module, pipelineShaders, pipelineState); - return PreservedAnalyses::none(); -} -// ===================================================================================================================== -// Executes this LLVM patching pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -// @param pipelineShaders : Pipeline shaders analysis result -// @param pipelineState : Pipeline state -// @returns : True if the module was modified by the transformation and false otherwise -bool PatchEntryPointMutate::runImpl(Module &module, PipelineShadersResult &pipelineShaders, - PipelineState *pipelineState) { LLVM_DEBUG(dbgs() << "Run the pass Patch-Entry-Point-Mutate\n"); Patch::init(&module); @@ -172,7 +160,7 @@ bool PatchEntryPointMutate::runImpl(Module &module, PipelineShadersResult &pipel m_cpsShaderInputCache.clear(); processGroupMemcpy(module); - return true; + return PreservedAnalyses::none(); } // ===================================================================================================================== @@ -1725,19 +1713,15 @@ void PatchEntryPointMutate::addSpecialUserDataArgs(SmallVectorImpl auto vsIntfData = m_pipelineState->getShaderInterfaceData(ShaderStage::Vertex); auto vsResUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Vertex); - // Detect whether this is an unlinked compile that will need a fetch shader. If so, we need to - // add the vertex buffer table and base vertex and base instance, even if they appear unused here. - bool willHaveFetchShader = m_pipelineState->getPalMetadata()->getVertexFetchCount() != 0; - // Vertex buffer table. - if (willHaveFetchShader || userDataUsage->isSpecialUserDataUsed(UserDataMapping::VertexBufferTable)) { + if (userDataUsage->isSpecialUserDataUsed(UserDataMapping::VertexBufferTable)) { specialUserDataArgs.push_back(UserDataArg(builder.getInt32Ty(), "vertexBufferTable", UserDataMapping::VertexBufferTable, &vsIntfData->entryArgIdxs.vs.vbTablePtr)); } // Base vertex and base instance. - if (willHaveFetchShader || vsResUsage->builtInUsage.vs.baseVertex || vsResUsage->builtInUsage.vs.baseInstance || + if (vsResUsage->builtInUsage.vs.baseVertex || vsResUsage->builtInUsage.vs.baseInstance || userDataUsage->isSpecialUserDataUsed(UserDataMapping::BaseVertex) || userDataUsage->isSpecialUserDataUsed(UserDataMapping::BaseInstance)) { specialUserDataArgs.push_back(UserDataArg(builder.getInt32Ty(), "baseVertex", UserDataMapping::BaseVertex, diff --git a/lgc/patch/PatchImageDerivatives.cpp b/lgc/patch/PatchImageDerivatives.cpp index 8d760d4167..e7798d7acd 100644 --- a/lgc/patch/PatchImageDerivatives.cpp +++ b/lgc/patch/PatchImageDerivatives.cpp @@ -44,19 +44,6 @@ using namespace lgc; namespace lgc { -// ===================================================================================================================== -// Executes this LLVM patching pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -// @param [in/out] analysisManager : Analysis manager to use for this transformation -// @returns : The preserved analyses (The analyses that are still valid after this pass) -PreservedAnalyses PatchImageDerivatives::run(Module &module, ModuleAnalysisManager &analysisManager) { - PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); - if (runImpl(module, pipelineState)) - return PreservedAnalyses::all(); // Note: this patching never invalidates analysis data - return PreservedAnalyses::all(); -} - static bool usesImplicitDerivatives(StringRef name) { if (!(name.starts_with("llvm.amdgcn.image.sample") || name.starts_with("llvm.amdgcn.image.gather"))) return false; @@ -69,16 +56,18 @@ static bool usesImplicitDerivatives(StringRef name) { // Executes this LLVM patching pass on the specified LLVM module. // // @param [in/out] module : LLVM module to be run on -// @param pipelineState : Pipeline state -// @returns : True if the module was modified by the transformation and false otherwise -bool PatchImageDerivatives::runImpl(llvm::Module &module, PipelineState *pipelineState) { +// @param [in/out] analysisManager : Analysis manager to use for this transformation +// @returns : The preserved analyses (The analyses that are still valid after this pass) +PreservedAnalyses PatchImageDerivatives::run(Module &module, ModuleAnalysisManager &analysisManager) { + PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); + LLVM_DEBUG(dbgs() << "Run the pass Patch-Image-Derivatives\n"); if (!pipelineState->hasShaderStage(ShaderStage::Fragment)) - return false; + return PreservedAnalyses::all(); ResourceUsage *resUsage = pipelineState->getShaderResourceUsage(ShaderStage::Fragment); if (!resUsage->builtInUsage.fs.discard) - return false; + return PreservedAnalyses::all(); SmallSet killBlocks; DenseSet derivativeBlocks; @@ -108,7 +97,7 @@ bool PatchImageDerivatives::runImpl(llvm::Module &module, PipelineState *pipelin // Note: in theory killBlocks should not be empty here, but it is cheap to check. if (killBlocks.empty() || derivativeBlocks.empty()) - return false; + return PreservedAnalyses::all(); DenseSet visitedBlocks; SmallVector roots; @@ -152,14 +141,14 @@ bool PatchImageDerivatives::runImpl(llvm::Module &module, PipelineState *pipelin LLVM_DEBUG(dbgs() << "Detected implicit derivatives used after kill.\n"); Function *fsFunc = testBlock->getParent(); fsFunc->addFnAttr("amdgpu-transform-discard-to-demote"); - return true; + return PreservedAnalyses::all(); // we don't actually invalidate anything } append_range(worklist, successors(testBlock)); } // No paths from kills to derivatives exist. - return false; + return PreservedAnalyses::all(); } } // namespace lgc diff --git a/lgc/patch/PatchImageOpCollect.cpp b/lgc/patch/PatchImageOpCollect.cpp index 4042938af2..dd316cbb88 100644 --- a/lgc/patch/PatchImageOpCollect.cpp +++ b/lgc/patch/PatchImageOpCollect.cpp @@ -49,18 +49,7 @@ namespace lgc { // @returns : The preserved analyses (The analyses that are still valid after this pass) PreservedAnalyses PatchImageOpCollect::run(Module &module, ModuleAnalysisManager &analysisManager) { PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); - if (runImpl(module, pipelineState)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} -// ===================================================================================================================== -// Executes this LLVM patching pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -// @param pipelineState : Pipeline state -// @returns : True if the module was modified by the transformation and false otherwise -bool PatchImageOpCollect::runImpl(llvm::Module &module, PipelineState *pipelineState) { LLVM_DEBUG(dbgs() << "Run the pass Patch-Image-Op-Collect\n"); for (Function &func : module) { @@ -75,7 +64,7 @@ bool PatchImageOpCollect::runImpl(llvm::Module &module, PipelineState *pipelineS } } } - return false; + return PreservedAnalyses::all(); // we don't actually invalidate anything } } // namespace lgc diff --git a/lgc/patch/PatchInOutImportExport.cpp b/lgc/patch/PatchInOutImportExport.cpp index 8baf314d15..5afbe2b3a2 100644 --- a/lgc/patch/PatchInOutImportExport.cpp +++ b/lgc/patch/PatchInOutImportExport.cpp @@ -84,26 +84,11 @@ void PatchInOutImportExport::initPerShader() { PreservedAnalyses PatchInOutImportExport::run(Module &module, ModuleAnalysisManager &analysisManager) { PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); PipelineShadersResult &pipelineShaders = analysisManager.getResult(module); - auto getPDT = [&](Function &f) -> PostDominatorTree & { + auto getPostDominatorTree = [&](Function &f) -> PostDominatorTree & { auto &fam = analysisManager.getResult(module).getManager(); return fam.getResult(f); }; - if (runImpl(module, pipelineShaders, pipelineState, getPDT)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} -// ===================================================================================================================== -// Executes this LLVM patching pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -// @param pipelineShaders : Pipeline shaders analysis result -// @param pipelineState : Pipeline state -// @param getPostDominatorTree : Function to get the PostDominatorTree of the given Function object -// @returns : True if the module was modified by the transformation and false otherwise -bool PatchInOutImportExport::runImpl(Module &module, PipelineShadersResult &pipelineShaders, - PipelineState *pipelineState, - const std::function &getPostDominatorTree) { LLVM_DEBUG(dbgs() << "Run the pass Patch-In-Out-Import-Export\n"); Patch::init(&module); @@ -131,12 +116,6 @@ bool PatchInOutImportExport::runImpl(Module &module, PipelineShadersResult &pipe m_lds = Patch::getLdsVariable(m_pipelineState, m_module); // Set buffer formats based on specific GFX - static const std::array BufferFormatsGfx9 = { - BUF_NUM_FORMAT_FLOAT << 4 | BUF_DATA_FORMAT_32, - BUF_NUM_FORMAT_FLOAT << 4 | BUF_DATA_FORMAT_32_32, - BUF_NUM_FORMAT_FLOAT << 4 | BUF_DATA_FORMAT_32_32_32, - BUF_NUM_FORMAT_FLOAT << 4 | BUF_DATA_FORMAT_32_32_32_32, - }; static const std::array BufferFormatsGfx10 = { BUF_FORMAT_32_FLOAT, BUF_FORMAT_32_32_FLOAT_GFX10, @@ -151,15 +130,15 @@ bool PatchInOutImportExport::runImpl(Module &module, PipelineShadersResult &pipe }; switch (m_gfxIp.major) { - default: - m_buffFormats = &BufferFormatsGfx9; - break; case 10: m_buffFormats = &BufferFormatsGfx10; break; case 11: m_buffFormats = &BufferFormatsGfx11; break; + default: + llvm_unreachable("unsupported GFX IP"); + break; } // Process each shader in turn, in reverse order (because for example VS uses inOutUsage.tcs.calcFactor @@ -196,7 +175,7 @@ bool PatchInOutImportExport::runImpl(Module &module, PipelineShadersResult &pipe m_pipelineSysValues.clear(); - return true; + return PreservedAnalyses::none(); } void PatchInOutImportExport::processFunction( @@ -3744,7 +3723,7 @@ void PatchInOutImportExport::storeValueToStreamOutBuffer(Value *storeValue, unsi } // NOTE: SW XFB must have been handled. Here we only handle HW XFB on pre-GFX11 generations. - assert(m_gfxIp.major < 11); + assert(m_gfxIp.major == 10); auto storeTy = storeValue->getType(); @@ -3823,32 +3802,13 @@ void PatchInOutImportExport::storeValueToStreamOutBuffer(Value *storeValue, unsi // writeIndex += threadId writeIndex = builder.CreateAdd(writeIndex, m_threadId); - unsigned format = 0; - switch (m_gfxIp.major) { - default: { - CombineFormat combineFormat = {}; - combineFormat.bits.nfmt = BUF_NUM_FORMAT_FLOAT; - static const unsigned char dfmtTable[4][2] = { - {BUF_DATA_FORMAT_16, BUF_DATA_FORMAT_32}, - {BUF_DATA_FORMAT_16_16, BUF_DATA_FORMAT_32_32}, - {BUF_DATA_FORMAT_INVALID, BUF_DATA_FORMAT_32_32_32}, - {BUF_DATA_FORMAT_16_16_16_16, BUF_DATA_FORMAT_32_32_32_32}, - }; - combineFormat.bits.dfmt = dfmtTable[compCount - 1][bitWidth == 32]; - format = combineFormat.u32All; - break; - } - case 10: { - static unsigned char formatTable[4][2] = { - {BUF_FORMAT_16_FLOAT, BUF_FORMAT_32_FLOAT}, - {BUF_FORMAT_16_16_FLOAT, BUF_FORMAT_32_32_FLOAT_GFX10}, - {BUF_FORMAT_INVALID, BUF_FORMAT_32_32_32_FLOAT_GFX10}, - {BUF_FORMAT_16_16_16_16_FLOAT_GFX10, BUF_FORMAT_32_32_32_32_FLOAT_GFX10}, - }; - format = formatTable[compCount - 1][bitWidth == 32]; - break; - } - } + static unsigned char formatTable[4][2] = { + {BUF_FORMAT_16_FLOAT, BUF_FORMAT_32_FLOAT}, + {BUF_FORMAT_16_16_FLOAT, BUF_FORMAT_32_32_FLOAT_GFX10}, + {BUF_FORMAT_INVALID, BUF_FORMAT_32_32_32_FLOAT_GFX10}, + {BUF_FORMAT_16_16_16_16_FLOAT_GFX10, BUF_FORMAT_32_32_32_32_FLOAT_GFX10}, + }; + unsigned format = formatTable[compCount - 1][bitWidth == 32]; CoherentFlag coherent = {}; coherent.bits.glc = true; diff --git a/lgc/patch/PatchInitializeWorkgroupMemory.cpp b/lgc/patch/PatchInitializeWorkgroupMemory.cpp index ae044528bf..7e60b158d3 100644 --- a/lgc/patch/PatchInitializeWorkgroupMemory.cpp +++ b/lgc/patch/PatchInitializeWorkgroupMemory.cpp @@ -57,26 +57,13 @@ namespace lgc { PreservedAnalyses PatchInitializeWorkgroupMemory::run(Module &module, ModuleAnalysisManager &analysisManager) { PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); PipelineShadersResult &pipelineShaders = analysisManager.getResult(module); - if (runImpl(module, pipelineShaders, pipelineState)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} -// ===================================================================================================================== -// Executes this LLVM patching pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -// @param pipelineShaders : Pipeline shaders analysis result -// @param pipelineState : Pipeline state -// @returns : True if the module was modified by the transformation and false otherwise -bool PatchInitializeWorkgroupMemory::runImpl(Module &module, PipelineShadersResult &pipelineShaders, - PipelineState *pipelineState) { LLVM_DEBUG(dbgs() << "Run the pass Patch-Initialize-Workgroup-Memory\n"); m_pipelineState = pipelineState; // This pass works on compute shader. if (!m_pipelineState->hasShaderStage(ShaderStage::Compute)) - return false; + return PreservedAnalyses::all(); SmallVector workgroupGlobals; for (GlobalVariable &global : module.globals()) { @@ -88,7 +75,7 @@ bool PatchInitializeWorkgroupMemory::runImpl(Module &module, PipelineShadersResu } if (workgroupGlobals.empty()) - return false; + return PreservedAnalyses::all(); Patch::init(&module); m_shaderStage = ShaderStage::Compute; @@ -125,7 +112,7 @@ bool PatchInitializeWorkgroupMemory::runImpl(Module &module, PipelineShadersResu initializeWithZero(lds, builder); - return true; + return PreservedAnalyses::none(); } // ===================================================================================================================== diff --git a/lgc/patch/PatchInvariantLoads.cpp b/lgc/patch/PatchInvariantLoads.cpp index a29af844ef..d05ba4ebff 100644 --- a/lgc/patch/PatchInvariantLoads.cpp +++ b/lgc/patch/PatchInvariantLoads.cpp @@ -44,21 +44,6 @@ using namespace lgc; namespace lgc { -// ===================================================================================================================== -// Executes this LLVM pass on the specified LLVM function. -// -// @param [in/out] function : Function that we will patch. -// @param [in/out] analysisManager : Analysis manager to use for this transformation -// @returns : The preserved analyses (The analyses that are still valid after this pass) -PreservedAnalyses PatchInvariantLoads::run(Function &function, FunctionAnalysisManager &analysisManager) { - const auto &moduleAnalysisManager = analysisManager.getResult(function); - PipelineState *pipelineState = - moduleAnalysisManager.getCachedResult(*function.getParent())->getPipelineState(); - if (runImpl(function, pipelineState)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} - static const unsigned UNKNOWN_ADDRESS_SPACE = ADDR_SPACE_MAX + 1; enum AddrSpaceBit { @@ -96,14 +81,18 @@ static unsigned findAddressSpaceAccess(const Instruction *inst) { // Executes this LLVM pass on the specified LLVM function. // // @param [in/out] function : Function that we will patch. -// @param [in/out] pipelineState : Pipeline state object to use for this pass -// @returns : True if the function was modified by the transformation and false otherwise -bool PatchInvariantLoads::runImpl(Function &function, PipelineState *pipelineState) { +// @param [in/out] analysisManager : Analysis manager to use for this transformation +// @returns : The preserved analyses (The analyses that are still valid after this pass) +PreservedAnalyses PatchInvariantLoads::run(Function &function, FunctionAnalysisManager &analysisManager) { + const auto &moduleAnalysisManager = analysisManager.getResult(function); + PipelineState *pipelineState = + moduleAnalysisManager.getCachedResult(*function.getParent())->getPipelineState(); + LLVM_DEBUG(dbgs() << "Run the pass Patch-Invariant-Loads\n"); auto shaderStage = lgc::getShaderStage(&function); if (!shaderStage) - return false; + return PreservedAnalyses::all(); auto &options = pipelineState->getShaderOptions(shaderStage.value()); bool clearInvariants = options.aggressiveInvariantLoads == ClearInvariants; @@ -124,7 +113,7 @@ bool PatchInvariantLoads::runImpl(Function &function, PipelineState *pipelineSta } if (!(clearInvariants || aggressiveInvariants)) - return false; + return PreservedAnalyses::all(); LLVM_DEBUG(dbgs() << (clearInvariants ? "Removing invariant load flags" : "Attempting aggressive invariant load optimization") @@ -185,7 +174,7 @@ bool PatchInvariantLoads::runImpl(Function &function, PipelineState *pipelineSta unsigned addrSpace = findAddressSpaceAccess(&inst); if (addrSpace == UNKNOWN_ADDRESS_SPACE) { LLVM_DEBUG(dbgs() << "Write to unknown memory found, aborting aggressive invariant load optimization\n"); - return false; + return PreservedAnalyses::all(); } writtenAddrSpaces |= aliasMatrix[addrSpace]; } else if (inst.mayReadFromMemory()) { @@ -196,11 +185,11 @@ bool PatchInvariantLoads::runImpl(Function &function, PipelineState *pipelineSta if (loads.empty()) { LLVM_DEBUG(dbgs() << "Shader has no memory loads\n"); - return false; + return PreservedAnalyses::all(); } + bool changed = false; if (clearInvariants) { - bool changed = false; for (Instruction *inst : loads) { if (!inst->hasMetadata(LLVMContext::MD_invariant_load)) continue; @@ -209,23 +198,21 @@ bool PatchInvariantLoads::runImpl(Function &function, PipelineState *pipelineSta inst->setMetadata(LLVMContext::MD_invariant_load, nullptr); changed = true; } - return changed; - } + } else { + auto &context = function.getContext(); + for (Instruction *inst : loads) { + if (inst->hasMetadata(LLVMContext::MD_invariant_load)) + continue; + if (writtenAddrSpaces && (writtenAddrSpaces & (1 << findAddressSpaceAccess(inst)))) + continue; - auto &context = function.getContext(); - bool changed = false; - for (Instruction *inst : loads) { - if (inst->hasMetadata(LLVMContext::MD_invariant_load)) - continue; - if (writtenAddrSpaces && (writtenAddrSpaces & (1 << findAddressSpaceAccess(inst)))) - continue; - - LLVM_DEBUG(dbgs() << "Marking load invariant: " << *inst << "\n"); - inst->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(context, {})); - changed = true; + LLVM_DEBUG(dbgs() << "Marking load invariant: " << *inst << "\n"); + inst->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(context, {})); + changed = true; + } } - return changed; + return changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } } // namespace lgc diff --git a/lgc/patch/PatchLlvmIrInclusion.cpp b/lgc/patch/PatchLlvmIrInclusion.cpp index 07a98ed3e6..3ff1c7f387 100644 --- a/lgc/patch/PatchLlvmIrInclusion.cpp +++ b/lgc/patch/PatchLlvmIrInclusion.cpp @@ -48,20 +48,9 @@ namespace lgc { // @returns : The preserved analyses (The analyses that are still valid after this pass) PreservedAnalyses PatchLlvmIrInclusion::run(Module &module, ModuleAnalysisManager &analysisManager) { PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); - if (pipelineState->getOptions().includeIr) - runImpl(module); - return PreservedAnalyses::none(); -} + if (!pipelineState->getOptions().includeIr) + return PreservedAnalyses::all(); -// ===================================================================================================================== -// Executes this patching pass on the specified LLVM module. -// -// This pass includes LLVM IR as a separate section in the ELF binary by inserting a new global variable with explicit -// section. -// -// @param [in/out] module : LLVM module to be run on -// @returns : True if the module was modified by the transformation and false otherwise -bool PatchLlvmIrInclusion::runImpl(Module &module) { Patch::init(&module); std::string moduleStr; @@ -78,7 +67,7 @@ bool PatchLlvmIrInclusion::runImpl(Module &module) { std::string namePrefix = Util::Abi::AmdGpuCommentName; global->setSection(namePrefix + "llvmir"); - return true; + return PreservedAnalyses::none(); } } // namespace lgc diff --git a/lgc/patch/PatchLoadScalarizer.cpp b/lgc/patch/PatchLoadScalarizer.cpp index 90eb5afea2..a551bff048 100644 --- a/lgc/patch/PatchLoadScalarizer.cpp +++ b/lgc/patch/PatchLoadScalarizer.cpp @@ -58,18 +58,7 @@ PreservedAnalyses PatchLoadScalarizer::run(Function &function, FunctionAnalysisM const auto &moduleAnalysisManager = analysisManager.getResult(function); PipelineState *pipelineState = moduleAnalysisManager.getCachedResult(*function.getParent())->getPipelineState(); - if (runImpl(function, pipelineState)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} -// ===================================================================================================================== -// Executes this LLVM pass on the specified LLVM function. -// -// @param [in/out] function : Function that will run this optimization. -// @param [in/out] pipelineState : Pipeline state object to use for this pass -// @returns : True if the module was modified by the transformation and false otherwise -bool PatchLoadScalarizer::runImpl(Function &function, PipelineState *pipelineState) { LLVM_DEBUG(dbgs() << "Run the pass Patch-Load-Scalarizer-Opt\n"); auto shaderStage = lgc::getShaderStage(&function); @@ -79,7 +68,7 @@ bool PatchLoadScalarizer::runImpl(Function &function, PipelineState *pipelineSta if (shaderStage) m_scalarThreshold = pipelineState->getShaderOptions(shaderStage.value()).loadScalarizerThreshold; if (m_scalarThreshold == 0) - return false; + return PreservedAnalyses::all(); m_builder = std::make_unique>(function.getContext()); @@ -93,7 +82,7 @@ bool PatchLoadScalarizer::runImpl(Function &function, PipelineState *pipelineSta } m_instsToErase.clear(); - return changed; + return changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } // ===================================================================================================================== diff --git a/lgc/patch/PatchLoopMetadata.cpp b/lgc/patch/PatchLoopMetadata.cpp index d1ce7c9eda..e6af095cb8 100644 --- a/lgc/patch/PatchLoopMetadata.cpp +++ b/lgc/patch/PatchLoopMetadata.cpp @@ -101,18 +101,7 @@ PreservedAnalyses PatchLoopMetadata::run(Loop &loop, LoopAnalysisManager &analys Module *module = loop.getHeader()->getModule(); const auto &mamProxy = analysisManager.getResult(loop, loopAnalysisResults); PipelineState *pipelineState = mamProxy.getCachedResult(*module)->getPipelineState(); - if (runImpl(loop, pipelineState)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} -// ===================================================================================================================== -// Executes this LLVM patching pass on the specified LLVM module. -// -// @param [in/out] loop : LLVM loop to be run on -// @param pipelineState : Pipeline state -// @returns : True if the loop was modified by the transformation and false otherwise -bool PatchLoopMetadata::runImpl(Loop &loop, PipelineState *pipelineState) { LLVM_DEBUG(dbgs() << "Run the pass lgc-patch-loop-metadata\n"); Function *func = loop.getHeader()->getFirstNonPHI()->getFunction(); @@ -124,7 +113,7 @@ bool PatchLoopMetadata::runImpl(Loop &loop, PipelineState *pipelineState) { auto stage = getShaderStage(func); if (!stage) - return false; + return PreservedAnalyses::all(); if (auto shaderOptions = &mPipelineState->getShaderOptions(stage.value())) { m_disableLoopUnroll = shaderOptions->disableLoopUnroll; m_forceLoopUnrollCount = shaderOptions->forceLoopUnrollCount; @@ -135,7 +124,7 @@ bool PatchLoopMetadata::runImpl(Loop &loop, PipelineState *pipelineState) { MDNode *loopMetaNode = loop.getLoopID(); if (!loopMetaNode || loopMetaNode->getOperand(0) != loopMetaNode) - return false; + return PreservedAnalyses::all(); LLVM_DEBUG(dbgs() << "loop in " << func->getName() << " at depth " << loop.getLoopDepth() << " has " << loop.getNumBlocks() << " blocks\n"); @@ -209,5 +198,5 @@ bool PatchLoopMetadata::runImpl(Loop &loop, PipelineState *pipelineState) { loop.setLoopID(loopMetaNode); } - return changed; + return changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } diff --git a/lgc/patch/PatchNullFragShader.cpp b/lgc/patch/PatchNullFragShader.cpp index b58c7b299d..7347611f8d 100644 --- a/lgc/patch/PatchNullFragShader.cpp +++ b/lgc/patch/PatchNullFragShader.cpp @@ -54,33 +54,23 @@ using namespace llvm; // @returns : The preserved analyses (The analyses that are still valid after this pass) PreservedAnalyses PatchNullFragShader::run(Module &module, ModuleAnalysisManager &analysisManager) { PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); - if (runImpl(module, pipelineState)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} -// ===================================================================================================================== -// Run the pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -// @returns : True if the module was modified by the transformation and false otherwise -bool PatchNullFragShader::runImpl(Module &module, PipelineState *pipelineState) { LLVM_DEBUG(dbgs() << "Run the pass Patch-Null-Frag-Shader\n"); Patch::init(&module); // Do not add a null fragment shader if not generating a whole pipeline. if (!pipelineState->isWholePipeline()) - return false; + return PreservedAnalyses::all(); // If a fragment shader is not needed, then do not generate one. const bool hasFs = pipelineState->hasShaderStage(ShaderStage::Fragment); if (hasFs || !pipelineState->isGraphics()) - return false; + return PreservedAnalyses::all(); FragColorExport::generateNullFragmentShader(module, pipelineState, lgcName::NullFsEntryPoint); updatePipelineState(pipelineState); - return true; + return PreservedAnalyses::none(); } // ===================================================================================================================== diff --git a/lgc/patch/PatchNullFragShader.h b/lgc/patch/PatchNullFragShader.h index 4ce77e77c2..a9e0f70066 100644 --- a/lgc/patch/PatchNullFragShader.h +++ b/lgc/patch/PatchNullFragShader.h @@ -41,8 +41,6 @@ class PatchNullFragShader : public Patch, public llvm::PassInfoMixingetPalMetadata()->finalizePipeline(m_pipelineState->isWholePipeline()); - return true; // Modified the module. + return PreservedAnalyses::none(); } // ===================================================================================================================== @@ -422,35 +408,11 @@ void PatchPreparePipelineAbi::mergeShader(Module &module) { // // @param module : LLVM module void PatchPreparePipelineAbi::setAbiEntryNames(Module &module) { - bool hasTs = m_hasTcs || m_hasTes; - bool isFetchless = m_pipelineState->getPalMetadata()->getVertexFetchCount() != 0; for (auto &func : module) { if (!func.empty()) { auto callingConv = func.getCallingConv(); - bool isFetchlessVs = false; - if (isFetchless) { - switch (callingConv) { - case CallingConv::AMDGPU_VS: - isFetchlessVs = !m_hasGs && !hasTs; - break; - case CallingConv::AMDGPU_GS: - isFetchlessVs = m_gfxIp.major >= 9 && !hasTs; - break; - case CallingConv::AMDGPU_ES: - isFetchlessVs = !hasTs; - break; - case CallingConv::AMDGPU_HS: - isFetchlessVs = m_gfxIp.major >= 9; - break; - case CallingConv::AMDGPU_LS: - isFetchlessVs = true; - break; - default: - break; - } - } - StringRef entryName = getEntryPointName(callingConv, isFetchlessVs); + StringRef entryName = getEntryPointName(callingConv, false); if (entryName != "") func.setName(entryName); } diff --git a/lgc/patch/PatchResourceCollect.cpp b/lgc/patch/PatchResourceCollect.cpp index 6880cf1b5b..85a67be8fc 100644 --- a/lgc/patch/PatchResourceCollect.cpp +++ b/lgc/patch/PatchResourceCollect.cpp @@ -72,19 +72,7 @@ PatchResourceCollect::PatchResourceCollect() : m_resUsage(nullptr) { PreservedAnalyses PatchResourceCollect::run(Module &module, ModuleAnalysisManager &analysisManager) { PipelineShadersResult &pipelineShaders = analysisManager.getResult(module); PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); - runImpl(module, pipelineShaders, pipelineState); - return PreservedAnalyses::none(); -} -// ===================================================================================================================== -// Executes this LLVM patching pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -// @param pipelineShaders : Pipeline shaders analysis result -// @param pipelineState : Pipeline state -// @returns : True if the module was modified by the transformation and false otherwise -bool PatchResourceCollect::runImpl(Module &module, PipelineShadersResult &pipelineShaders, - PipelineState *pipelineState) { LLVM_DEBUG(dbgs() << "Run the pass Patch-Resource-Collect\n"); Patch::init(&module); @@ -153,7 +141,7 @@ bool PatchResourceCollect::runImpl(Module &module, PipelineShadersResult &pipeli } } - return true; + return PreservedAnalyses::none(); } // ===================================================================================================================== @@ -162,11 +150,7 @@ bool PatchResourceCollect::runImpl(Module &module, PipelineShadersResult &pipeli // @param [in/out] module : Module void PatchResourceCollect::setNggControl(Module *module) { assert(m_pipelineState->isGraphics()); - - // For GFX10+, initialize NGG control settings - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major < 10) - return; - + assert(m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 10); // If mesh pipeline, skip NGG control settings const bool meshPipeline = m_pipelineState->hasShaderStage(ShaderStage::Task) || m_pipelineState->hasShaderStage(ShaderStage::Mesh); @@ -425,8 +409,7 @@ bool PatchResourceCollect::canUseNggCulling(Module *module) { } return false; }; - bool hasVertexFetch = m_pipelineState->getPalMetadata()->getVertexFetchCount() != 0; - if (!hasGs && !hasVertexFetch && !hasPositionFetch()) + if (!hasGs && !hasPositionFetch()) return false; // We can safely enable NGG culling here @@ -1136,19 +1119,26 @@ bool PatchResourceCollect::isVertexReuseDisabled() { // // @param module : LLVM module void PatchResourceCollect::checkRayQueryLdsStackUsage(Module *module) { - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major < 10) - return; // Must be GFX10+ - + assert(m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 10); auto ldsStack = module->getNamedGlobal(RayQueryLdsStackName); if (ldsStack) { - for (auto user : ldsStack->users()) { - auto inst = cast(user); - assert(inst); + SmallVector worklist; + worklist.push_back(ldsStack); + do { + Constant *current = worklist.pop_back_val(); + for (auto user : current->users()) { + if (auto *constUser = dyn_cast(user)) { + worklist.push_back(constUser); + continue; + } - auto shaderStage = lgc::getShaderStage(inst->getFunction()); - if (shaderStage) - m_pipelineState->getShaderResourceUsage(shaderStage.value())->useRayQueryLdsStack = true; - } + auto inst = cast(user); + + auto shaderStage = lgc::getShaderStage(inst->getFunction()); + if (shaderStage) + m_pipelineState->getShaderResourceUsage(shaderStage.value())->useRayQueryLdsStack = true; + } + } while (!worklist.empty()); } } diff --git a/lgc/patch/PatchSetupTargetFeatures.cpp b/lgc/patch/PatchSetupTargetFeatures.cpp index e72e4ce984..aa7e5c31af 100644 --- a/lgc/patch/PatchSetupTargetFeatures.cpp +++ b/lgc/patch/PatchSetupTargetFeatures.cpp @@ -48,17 +48,7 @@ using namespace lgc; // @returns : The preserved analyses (The analyses that are still valid after this pass) PreservedAnalyses PatchSetupTargetFeatures::run(Module &module, ModuleAnalysisManager &analysisManager) { PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); - runImpl(module, pipelineState); - return PreservedAnalyses::none(); -} -// ===================================================================================================================== -// Run the pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -// @param pipelineState : Pipeline state -// @returns : True if the module was modified by the transformation and false otherwise -bool PatchSetupTargetFeatures::runImpl(Module &module, PipelineState *pipelineState) { LLVM_DEBUG(dbgs() << "Run the pass Patch-Setup-Target-Features\n"); Patch::init(&module); @@ -66,7 +56,7 @@ bool PatchSetupTargetFeatures::runImpl(Module &module, PipelineState *pipelineSt m_pipelineState = pipelineState; setupTargetFeatures(&module); - return true; // Modified the module. + return PreservedAnalyses::none(); } // ===================================================================================================================== diff --git a/lgc/patch/PatchWorkarounds.cpp b/lgc/patch/PatchWorkarounds.cpp index b070bf181d..6c9b1fa566 100644 --- a/lgc/patch/PatchWorkarounds.cpp +++ b/lgc/patch/PatchWorkarounds.cpp @@ -54,18 +54,7 @@ namespace lgc { // @returns : The preserved analyses (The analyses that are still valid after this pass) PreservedAnalyses PatchWorkarounds::run(Module &module, ModuleAnalysisManager &analysisManager) { PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); - if (runImpl(module, pipelineState)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} -// ===================================================================================================================== -// Executes this LLVM pass on the specified LLVM function. -// -// @param [in/out] module : Module that we will add workarounds in -// @param pipelineState : Pipeline state -// @returns : True if the module was modified by the transformation and false otherwise -bool PatchWorkarounds::runImpl(Module &module, PipelineState *pipelineState) { LLVM_DEBUG(dbgs() << "Run the pass Patch-Workarounds\n"); Patch::init(&module); @@ -79,7 +68,7 @@ bool PatchWorkarounds::runImpl(Module &module, PipelineState *pipelineState) { // Patch image resource descriptor when app provides wrong type applyImageDescWorkaround(); - return m_changed; + return m_changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } // ===================================================================================================================== diff --git a/lgc/patch/ShaderInputs.cpp b/lgc/patch/ShaderInputs.cpp index 634788c3b4..126972bfb2 100644 --- a/lgc/patch/ShaderInputs.cpp +++ b/lgc/patch/ShaderInputs.cpp @@ -609,12 +609,6 @@ uint64_t ShaderInputs::getShaderArgTys(PipelineState *pipelineState, ShaderStage getShaderInputUsage(shaderStage, static_cast(ShaderInput::StreamOutOffset0) + i)->enable(); } } - if (pipelineState->getPalMetadata()->getVertexFetchCount() != 0) { - // This is an unlinked compile that will need a fetch shader. We need to add the vertex ID and - // instance ID, even if they appear unused here. - getShaderInputUsage(shaderStage, ShaderInput::VertexId)->enable(); - getShaderInputUsage(shaderStage, ShaderInput::InstanceId)->enable(); - } break; case ShaderStage::TessEval: if (!hasGs) { diff --git a/lgc/patch/ShaderMerger.cpp b/lgc/patch/ShaderMerger.cpp index 981c7bb698..2411a9fb57 100644 --- a/lgc/patch/ShaderMerger.cpp +++ b/lgc/patch/ShaderMerger.cpp @@ -283,9 +283,6 @@ FunctionType *ShaderMerger::generateLsHsEntryPointType(uint64_t *inRegMask) cons } argTys.push_back(Type::getInt32Ty(*m_context)); // Instance ID - // Vertex fetch VGPRs - appendVertexFetchTypes(argTys); - return FunctionType::get(Type::getVoidTy(*m_context), argTys, false); } @@ -421,18 +418,6 @@ Function *ShaderMerger::generateLsHsEntryPoint(Function *lsEntryPoint, Function {mergeWaveInfo, builder.getInt32(8), builder.getInt32(8)}); hsVertCount->setName("hsVertCount"); - // NOTE: For GFX9, hardware has an issue of initializing LS VGPRs. When HS is null, v0~v3 are initialized as LS - // VGPRs rather than expected v2~v4. - auto gpuWorkarounds = &m_pipelineState->getTargetInfo().getGpuWorkarounds(); - if (gpuWorkarounds->gfx9.fixLsVgprInput) { - auto nullHs = builder.CreateICmpEQ(hsVertCount, builder.getInt32(0)); - - vertexId = builder.CreateSelect(nullHs, vgprArgs[0], vgprArgs[2]); - relVertexId = builder.CreateSelect(nullHs, vgprArgs[1], vgprArgs[3]); - stepRate = builder.CreateSelect(nullHs, vgprArgs[2], vgprArgs[4]); - instanceId = builder.CreateSelect(nullHs, vgprArgs[3], vgprArgs[5]); - } - auto validLsVert = builder.CreateICmpULT(threadIdInWave, lsVertCount, "validLsVert"); builder.CreateCondBr(validLsVert, beginLsBlock, endLsBlock); @@ -622,9 +607,6 @@ FunctionType *ShaderMerger::generateEsGsEntryPointType(uint64_t *inRegMask) cons argTys.push_back(Type::getInt32Ty(*m_context)); // Relative vertex ID (auto index) argTys.push_back(Type::getInt32Ty(*m_context)); // Primitive ID (VS) argTys.push_back(Type::getInt32Ty(*m_context)); // Instance ID - - // Vertex fetch VGPRs - appendVertexFetchTypes(argTys); } return FunctionType::get(Type::getVoidTy(*m_context), argTys, false); @@ -953,21 +935,6 @@ void ShaderMerger::appendUserData(BuilderBase &builder, SmallVectorImpl } } -// ===================================================================================================================== -// Appends the type for each of the vertex fetches found in the PAL metadata. -// -// @param [in/out] argTys : The vector to which the type will be appended. -void ShaderMerger::appendVertexFetchTypes(std::vector &argTys) const { - if (m_pipelineState->getPalMetadata()->getVertexFetchCount() != 0) { - SmallVector fetches; - m_pipelineState->getPalMetadata()->getVertexFetchInfo(fetches); - m_pipelineState->getPalMetadata()->addVertexFetchInfo(fetches); - for (const auto &fetchInfo : fetches) { - argTys.push_back(getVgprTy(fetchInfo.ty)); - } - } -} - // ===================================================================================================================== // Appends the arguments in the range [begin,end) to the vector. // @@ -986,8 +953,7 @@ void ShaderMerger::appendArguments(SmallVectorImpl &args, ArrayRef= 10); Module *module = nullptr; if (entryPoint1) diff --git a/lgc/patch/ShaderMerger.h b/lgc/patch/ShaderMerger.h index 1623708d0a..587816aaf9 100644 --- a/lgc/patch/ShaderMerger.h +++ b/lgc/patch/ShaderMerger.h @@ -112,7 +112,6 @@ class ShaderMerger { void appendUserData(BuilderBase &builder, llvm::SmallVectorImpl &args, llvm::Function *target, unsigned argIdx, llvm::Value *userData, unsigned userDataCount, llvm::ArrayRef> substitutions = {}); - void appendVertexFetchTypes(std::vector &argTys) const; void appendArguments(llvm::SmallVectorImpl &args, llvm::ArrayRef argsToAppend) const; void gatherTuningAttributes(llvm::AttrBuilder &tuningAttrs, const llvm::Function *srcEntryPoint) const; diff --git a/lgc/patch/TcsPassthroughShader.cpp b/lgc/patch/TcsPassthroughShader.cpp index 9b3bc12ae3..684a213f56 100644 --- a/lgc/patch/TcsPassthroughShader.cpp +++ b/lgc/patch/TcsPassthroughShader.cpp @@ -272,15 +272,19 @@ void TcsPassthroughShader::generateTcsPassthroughShaderBody(Module &module, Pipe switch (builtIn) { case BuiltInPointSize: tcsBuiltInInfo.pointSizeIn = true; + tcsBuiltInInfo.pointSize = true; break; case BuiltInPosition: tcsBuiltInInfo.positionIn = true; + tcsBuiltInInfo.position = true; break; case BuiltInClipDistance: tcsBuiltInInfo.clipDistanceIn = arraySize; + tcsBuiltInInfo.clipDistance = arraySize; break; case BuiltInCullDistance: tcsBuiltInInfo.cullDistanceIn = arraySize; + tcsBuiltInInfo.cullDistance = arraySize; break; case BuiltInPatchVertices: tcsBuiltInInfo.patchVertices = true; diff --git a/lgc/patch/VertexFetch.cpp b/lgc/patch/VertexFetch.cpp index df8d6ce2ed..1c6c152758 100644 --- a/lgc/patch/VertexFetch.cpp +++ b/lgc/patch/VertexFetch.cpp @@ -613,18 +613,7 @@ const unsigned char VertexFetchImpl::m_vertexFormatMapGfx11[][9] = { // @returns : The preserved analyses (The analyses that are still valid after this pass) PreservedAnalyses LowerVertexFetch::run(Module &module, ModuleAnalysisManager &analysisManager) { PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); - if (runImpl(module, pipelineState)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} -// ===================================================================================================================== -// Run the lower vertex fetch pass on a module -// -// @param [in/out] module : Module -// @param pipelineState : Pipeline state -// @returns : True if the module was modified by the transformation and false otherwise -bool LowerVertexFetch::runImpl(Module &module, PipelineState *pipelineState) { // Gather vertex fetch calls. We can assume they're all in one function, the vertex shader. // We can assume that multiple fetches of the same location, component and type have been CSEd. SmallVector vertexFetches; @@ -637,7 +626,7 @@ bool LowerVertexFetch::runImpl(Module &module, PipelineState *pipelineState) { .build(); fetchVisitor.visit(vertexFetches, module); if (vertexFetches.empty()) - return false; + return PreservedAnalyses::all(); std::unique_ptr vertexFetch(VertexFetch::create( pipelineState->getLgcContext(), pipelineState->getOptions().useSoftwareVertexBufferDescriptors, @@ -666,101 +655,39 @@ bool LowerVertexFetch::runImpl(Module &module, PipelineState *pipelineState) { inst->replaceAllUsesWith(vertex); inst->eraseFromParent(); } - return true; - } - - if (!pipelineState->isUnlinked() || !pipelineState->getVertexInputDescriptions().empty()) { - // Whole-pipeline compilation (or shader compilation where we were given the vertex input descriptions). - // Lower each vertex fetch. - for (InputImportGenericOp *fetch : vertexFetches) { - Value *vertex = nullptr; - - // Find the vertex input description. - unsigned location = fetch->getLocation(); - unsigned component = cast(fetch->getElemIdx())->getZExtValue(); - - assert(!fetch->getPerPrimitive()); - assert(cast(fetch->getLocOffset())->isZero()); - - const VertexInputDescription *description = pipelineState->findVertexInputDescription(location); - - if (!description) { - // If we could not find vertex input info matching this location, just return undefined value. - vertex = PoisonValue::get(fetch->getType()); - } else { - // Fetch the vertex. - builder.SetInsertPoint(fetch); - builder.setShaderStage(ShaderStage::Vertex); - vertex = vertexFetch->fetchVertex(fetch->getType(), description, location, component, builder); - } - - // Replace and erase this call. - fetch->replaceAllUsesWith(vertex); - fetch->eraseFromParent(); - } - - return true; + return PreservedAnalyses::none(); } - // Unlinked shader compilation; the linker will add a fetch shader. Here we need to - // 1. add metadata giving the location, component, type of each vertex fetch; - // 2. add an input arg for each vertex fetch. - // - // First add the metadata and mutate the vertex shader function. - SmallVector info; - SmallVector argTys; - SmallVector argNames; + // Whole-pipeline compilation (or shader compilation where we were given the vertex input descriptions). + // Lower each vertex fetch. for (InputImportGenericOp *fetch : vertexFetches) { + Value *vertex = nullptr; + + // Find the vertex input description. unsigned location = fetch->getLocation(); unsigned component = cast(fetch->getElemIdx())->getZExtValue(); assert(!fetch->getPerPrimitive()); assert(cast(fetch->getLocOffset())->isZero()); - info.push_back({location, component, fetch->getType()}); + const VertexInputDescription *description = pipelineState->findVertexInputDescription(location); - Type *ty = fetch->getType(); - // The return value from the fetch shader needs to use all floats, as the back-end maps an int in the - // return value as an SGPR rather than a VGPR. For symmetry, we also use all floats here, in the input - // args to the fetchless vertex shader. - ty = getVgprTy(ty); - argTys.push_back(ty); - argNames.push_back(""); - } - pipelineState->getPalMetadata()->addVertexFetchInfo(info); - - // Mutate the vertex shader function to add the new args. - Function *newFunc = addFunctionArgs(vertexFetches[0]->getFunction(), nullptr, argTys, argNames); - - // Hook up each vertex fetch to the corresponding arg. - for (unsigned idx = 0; idx != vertexFetches.size(); ++idx) { - InputImportGenericOp *fetch = vertexFetches[idx]; - Value *vertex = newFunc->getArg(idx); - if (fetch->getType() != vertex->getType()) { - // We changed to an all-float type above. + if (!description) { + // If we could not find vertex input info matching this location, just return undefined value. + vertex = PoisonValue::get(fetch->getType()); + } else { + // Fetch the vertex. builder.SetInsertPoint(fetch); - Type *elementTy = fetch->getType()->getScalarType(); - unsigned numElements = vertex->getType()->getPrimitiveSizeInBits() / elementTy->getPrimitiveSizeInBits(); - vertex = - builder.CreateBitCast(vertex, numElements == 1 ? elementTy : FixedVectorType::get(elementTy, numElements)); - if (fetch->getType() != vertex->getType()) { - // The types are now vectors of the same element type but different element counts, or fetch->getType() - // is scalar. - if (auto vecTy = dyn_cast(fetch->getType())) { - int indices[] = {0, 1, 2, 3}; - vertex = - builder.CreateShuffleVector(vertex, vertex, ArrayRef(indices).slice(0, vecTy->getNumElements())); - } else { - vertex = builder.CreateExtractElement(vertex, uint64_t(0)); - } - } + builder.setShaderStage(ShaderStage::Vertex); + vertex = vertexFetch->fetchVertex(fetch->getType(), description, location, component, builder); } - vertex->setName("vertex" + Twine(info[idx].location) + "." + Twine(info[idx].component)); + + // Replace and erase this call. fetch->replaceAllUsesWith(vertex); fetch->eraseFromParent(); } - return true; + return PreservedAnalyses::none(); } // ===================================================================================================================== @@ -1433,13 +1360,6 @@ unsigned VertexFetchImpl::mapVertexFormat(unsigned dfmt, unsigned nfmt) const { GfxIpVersion gfxIp = m_lgcContext->getTargetInfo().getGfxIpVersion(); switch (gfxIp.major) { - default: { - CombineFormat formatOprd = {}; - formatOprd.bits.dfmt = dfmt; - formatOprd.bits.nfmt = nfmt; - format = formatOprd.u32All; - break; - } case 10: assert(dfmt < sizeof(m_vertexFormatMapGfx10) / sizeof(m_vertexFormatMapGfx10[0])); assert(nfmt < sizeof(m_vertexFormatMapGfx10[0]) / sizeof(m_vertexFormatMapGfx10[0][0])); @@ -1450,6 +1370,9 @@ unsigned VertexFetchImpl::mapVertexFormat(unsigned dfmt, unsigned nfmt) const { assert(nfmt < sizeof(m_vertexFormatMapGfx11[0]) / sizeof(m_vertexFormatMapGfx11[0][0])); format = m_vertexFormatMapGfx11[dfmt][nfmt]; break; + default: + llvm_unreachable("unsupported GFX IP"); + break; } return format; } diff --git a/lgc/state/PalMetadata.cpp b/lgc/state/PalMetadata.cpp index 0c6a2e0451..8d7cf93794 100644 --- a/lgc/state/PalMetadata.cpp +++ b/lgc/state/PalMetadata.cpp @@ -800,54 +800,6 @@ void PalMetadata::setUserDataLimit() { *m_userDataLimit = userDataLimit; } -// ===================================================================================================================== -// Store the vertex fetch information in PAL metadata for a fetchless vertex shader with shader compilation. -// -// @param info : Array of VertexFetchInfo structs -void PalMetadata::addVertexFetchInfo(ArrayRef fetches) { - // Each vertex fetch is an array containing {location,component,type}. - // .vertexInputs is an array containing the vertex fetches. - m_vertexInputs = m_pipelineNode[PipelineMetadataKey::VertexInputs].getArray(true); - for (const VertexFetchInfo &fetch : fetches) { - msgpack::ArrayDocNode fetchNode = m_document->getArrayNode(); - fetchNode.push_back(m_document->getNode(fetch.location)); - fetchNode.push_back(m_document->getNode(fetch.component)); - fetchNode.push_back(m_document->getNode(getTypeName(fetch.ty), /*copy=*/true)); - m_vertexInputs.push_back(fetchNode); - } -} - -// ===================================================================================================================== -// Get the count of vertex fetches for a fetchless vertex shader with shader compilation (or 0 otherwise). -unsigned PalMetadata::getVertexFetchCount() { - if (m_vertexInputs.isEmpty()) - return 0; - return m_vertexInputs.size(); -} - -// ===================================================================================================================== -// Get the vertex fetch information out of PAL metadata. Used by the linker to generate the fetch shader. -// Also removes the vertex fetch information, so it does not appear in the final linked ELF. -// -// @param [out] fetches : Vector to store info of each fetch -void PalMetadata::getVertexFetchInfo(SmallVectorImpl &fetches) { - if (m_vertexInputs.isEmpty()) { - auto it = m_pipelineNode.find(m_document->getNode(PipelineMetadataKey::VertexInputs)); - if (it == m_pipelineNode.end() || !it->second.isArray()) - return; - m_vertexInputs = it->second.getArray(); - } - for (unsigned i = 0, e = m_vertexInputs.size(); i != e; ++i) { - msgpack::ArrayDocNode fetchNode = m_vertexInputs[i].getArray(); - unsigned location = fetchNode[0].getUInt(); - unsigned component = fetchNode[1].getUInt(); - StringRef tyName = fetchNode[2].getString(); - Type *ty = getLlvmType(tyName); - fetches.push_back({location, component, ty}); - } - m_pipelineNode.erase(m_document->getNode(PipelineMetadataKey::VertexInputs)); -} - // ===================================================================================================================== // Store the color export information in PAL metadata for an exportless fragment shader with shader compilation. // @@ -1439,8 +1391,7 @@ unsigned PalMetadata::getVgprCount(unsigned callingConv) { // // @param callingConv : The calling convention of the shader stage bool PalMetadata::isWave32(unsigned callingConv) { - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major < 10) - return false; + assert(m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 10); if (m_useRegisterFieldFormat) { auto vgtShaderStagesEn = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters] diff --git a/lgc/state/PipelineShaders.cpp b/lgc/state/PipelineShaders.cpp index 0e7368dfb3..c14b41e61e 100644 --- a/lgc/state/PipelineShaders.cpp +++ b/lgc/state/PipelineShaders.cpp @@ -49,24 +49,12 @@ PipelineShadersResult::PipelineShadersResult() { } // ===================================================================================================================== -// Run the pass on the specified LLVM module. +// Run the analysis on the specified LLVM module. // // @param [in/out] module : LLVM module to be run on // @param [in/out] analysisManager : Analysis manager to use for this transformation // @returns : Result object of the PipelineShaders pass PipelineShadersResult PipelineShaders::run(Module &module, ModuleAnalysisManager &analysisManager) { - return runImpl(module); -} - -// ===================================================================================================================== -// Run the pass on the specified LLVM module. -// -// This populates the shader array. In the pipeline module, a shader entrypoint is a non-internal function definition, -// and it has metadata giving the SPIR-V execution model. -// -// @param [in/out] module : LLVM module to be run on -// @returns : Result object of the PipelineShaders pass -PipelineShadersResult PipelineShaders::runImpl(Module &module) { LLVM_DEBUG(dbgs() << "Run the pass Pipeline-Shaders\n"); PipelineShadersResult result; diff --git a/lgc/state/PipelineState.cpp b/lgc/state/PipelineState.cpp index c428b90e71..74820c0faa 100644 --- a/lgc/state/PipelineState.cpp +++ b/lgc/state/PipelineState.cpp @@ -1859,7 +1859,6 @@ unsigned PipelineState::getVerticesPerPrimitive() { return 0; } } else if (hasShaderStage(ShaderStage::TessControl) || hasShaderStage(ShaderStage::TessEval)) { - assert(getInputAssemblyState().primitiveType == PrimitiveType::Patch); const auto &tessMode = getShaderModes()->getTessellationMode(); if (tessMode.pointMode) return 1; @@ -1989,19 +1988,8 @@ PipelineStateWrapper::Result PipelineStateWrapper::run(Module &module, ModuleAna // @returns : The preserved analyses (The analyses that are still valid after this pass) PreservedAnalyses PipelineStateClearer::run(Module &module, ModuleAnalysisManager &analysisManager) { PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); - runImpl(module, pipelineState); - return PreservedAnalyses::none(); -} - -// ===================================================================================================================== -// Run PipelineStateClearer pass to clear the pipeline state out of the IR -// -// @param [in/out] module : IR module -// @param : PipelineState object to clear -// @returns : True if the module was modified by the transformation and false otherwise -bool PipelineStateClearer::runImpl(Module &module, PipelineState *pipelineState) { pipelineState->clear(&module); - return true; + return PreservedAnalyses::none(); } // ===================================================================================================================== diff --git a/lgc/state/TargetInfo.cpp b/lgc/state/TargetInfo.cpp index d73857ba2f..098b919764 100644 --- a/lgc/state/TargetInfo.cpp +++ b/lgc/state/TargetInfo.cpp @@ -42,10 +42,10 @@ static cl::opt NativeWaveSize("native-wave-size", cl::desc("Overrides hardw // ===================================================================================================================== // Functions to set up TargetInfo for the various targets -// gfx9+ +// gfx10+ // // @param [in/out] targetInfo : Target info -static void setGfx9BaseInfo(TargetInfo *targetInfo) { +static void setGfx10BaseInfo(TargetInfo *targetInfo) { // Initial settings (could be adjusted later according to graphics IP version info) targetInfo->getGpuProperty().waveSize = 64; @@ -80,63 +80,11 @@ static void setGfx9BaseInfo(TargetInfo *targetInfo) { targetInfo->getGpuProperty().numShaderEngines = 4; } -// gfx9 -// -// @param [in/out] targetInfo : Target info -static void setGfx9Info(TargetInfo *targetInfo) { - setGfx9BaseInfo(targetInfo); - - targetInfo->getGpuProperty().supportsXnack = 1; - - // TODO: Clean up code for all 1d texture patch - targetInfo->getGpuWorkarounds().gfx9.treat1dImagesAs2d = 1; - - targetInfo->getGpuWorkarounds().gfx9.shaderImageGatherInstFix = 1; - - targetInfo->getGpuWorkarounds().gfx9.fixCacheLineStraddling = 1; -} - -// gfx900 -// -// @param [in/out] targetInfo : Target info -static void setGfx900Info(TargetInfo *targetInfo) { - setGfx9Info(targetInfo); - targetInfo->getGpuWorkarounds().gfx9.fixLsVgprInput = 1; -} - -// gfx902 -// -// @param [in/out] targetInfo : Target info -static void setGfx902Info(TargetInfo *targetInfo) { - setGfx900Info(targetInfo); - targetInfo->getGpuProperty().supportsRbPlus = 1; -} - -// gfx904 -// -// @param [in/out] targetInfo : Target info -static void setGfx904Info(TargetInfo *targetInfo) { - setGfx9Info(targetInfo); - targetInfo->getGpuProperty().supportsRbPlus = 1; -} - -// gfx906 -// -// @param [in/out] targetInfo : Target info -static void setGfx906Info(TargetInfo *targetInfo) { - setGfx9Info(targetInfo); - - targetInfo->getGpuProperty().supportIntegerDotFlag.compBitwidth16 = true; - targetInfo->getGpuProperty().supportIntegerDotFlag.compBitwidth8 = true; - targetInfo->getGpuProperty().supportIntegerDotFlag.compBitwidth4 = true; - targetInfo->getGpuProperty().supportIntegerDotFlag.sameSignedness = true; -} - // gfx10 // // @param [in/out] targetInfo : Target info static void setGfx10Info(TargetInfo *targetInfo) { - setGfx9BaseInfo(targetInfo); + setGfx10BaseInfo(targetInfo); targetInfo->getGpuProperty().maxSgprsAvailable = 106; targetInfo->getGpuProperty().supportsPermLane16 = true; @@ -382,14 +330,6 @@ bool TargetInfo::setTargetInfo(StringRef gpuName) { }; static const GpuNameStringMap GpuNameMap[] = { - {"gfx900", &setGfx900Info}, // gfx900 - {"gfx901", &setGfx9Info}, // gfx901 - {"gfx902", &setGfx902Info}, // gfx902 - {"gfx903", &setGfx9Info}, // gfx903 - {"gfx904", &setGfx904Info}, // gfx904, vega12 - {"gfx906", &setGfx906Info}, // gfx906, vega20 - {"gfx909", &setGfx904Info}, // gfx909, raven2 - {"gfx90c", &setGfx9Info}, // gfx90c {"gfx1010", &setGfx1010Info}, // gfx1010 {"gfx1011", &setGfx1011Info}, // gfx1011, navi12 {"gfx1012", &setGfx1012Info}, // gfx1012, navi14 diff --git a/lgc/test/FetchShaderSingleInput.lgc b/lgc/test/FetchShaderSingleInput.lgc deleted file mode 100644 index b731ce15fc..0000000000 --- a/lgc/test/FetchShaderSingleInput.lgc +++ /dev/null @@ -1,151 +0,0 @@ -; Test building the vertex shader by itself. - -; RUN: lgc -mcpu=gfx1010 -extract=2 - <%s | FileCheck --check-prefixes=VS-ISA,VS-NGG-ISA %s -; VS-ISA: .p2align 8 -; VS-NGG-ISA-LABEL: _amdgpu_gs_main_fetchless: -; VS-ISA: exp pos0 -; VS-ISA: s_endpgm -; VS-ISA: .vertexInputs: -; VS-ISA: - 0 -; VS-ISA: - 0 -; VS-ISA: - v4f32 - -; Test building the vertex and fragment shaders to separate ELFs, then building the fetch shader to ISA. - -; RUN: lgc -mcpu=gfx1010 -extract=2 -filetype=obj -o %t.vs.elf - <%s && lgc -mcpu=gfx1010 -extract=3 -filetype=obj -o %t.fs.elf - <%s && lgc -mcpu=gfx1010 -extract=1 -o - -l -glue=1 %s %t.vs.elf %t.fs.elf | FileCheck -check-prefixes=FETCH-ISA,FETCH-ISA-NGG %s -; FETCH-ISA: .p2align 8 -; FETCH-ISA-NGG: _amdgpu_gs_main: -; FETCH-ISA-NGG: s_bfe_u32 {{.*}}, s3, 0x70000 -; FETCH-ISA-NGG: s_bfm_b32 exec -; FETCH-ISA-NGG: tbuffer_load_format_xyz v[9:11], -; Not expecting s_endpgm on a fetch shader. -; FETCH-ISA-NOT: s_endpgm - -; ---------------------------------------------------------------------- -; Extract 1: The pipeline state with no shaders. - -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" -target triple = "amdgcn--amdpal" - -!lgc.options = !{!0} -!lgc.options.VS = !{!1} -!lgc.options.FS = !{!2} -!lgc.user.data.nodes = !{!3, !4, !5, !6, !7} -!lgc.vertex.inputs = !{!8, !9, !10, !11} -!lgc.color.export.formats = !{!12} -!lgc.input.assembly.state = !{!13} -!lgc.viewport.state = !{!14} -!lgc.rasterizer.state = !{!15} - -!0 = !{i32 -1094458452, i32 -1026392042, i32 2073992001, i32 497582744, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2} -!1 = !{i32 -1960408933, i32 578719886, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 15, i32 3} -!2 = !{i32 -1498760258, i32 545756883, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 15, i32 3} -!3 = !{!"DescriptorTableVaPtr", i32 0, i32 0, i32 0, i32 1, i32 3} -!4 = !{!"DescriptorBuffer", i32 6, i32 0, i32 0, i32 4, i32 0, i32 0, i32 4} -!5 = !{!"DescriptorCombinedTexture", i32 3, i32 0, i32 4, i32 12, i32 0, i32 1, i32 12} -!6 = !{!"DescriptorBuffer", i32 6, i32 0, i32 16, i32 4, i32 0, i32 2, i32 4} -!7 = !{!"IndirectUserDataVaPtr", i32 0, i32 0, i32 1, i32 1, i32 4} -!8 = !{i32 0, i32 0, i32 0, i32 44, i32 13, i32 7, i32 -1} -!9 = !{i32 1, i32 0, i32 12, i32 44, i32 11, i32 7, i32 -1} -!10 = !{i32 2, i32 0, i32 20, i32 44, i32 13, i32 7, i32 -1} -!11 = !{i32 3, i32 0, i32 32, i32 44, i32 13, i32 7, i32 -1} -!12 = !{i32 16, i32 0, i32 0, i32 1} -!13 = !{i32 2, i32 3} -!14 = !{i32 1} -!15 = !{i32 0, i32 0, i32 0, i32 1} -!16 = !{i32 0} -!17 = !{i32 4} - -; ---------------------------------------------------------------------- -; Extract 2: The vertex shader - -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" -target triple = "amdgcn--amdpal" - -define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !lgc.shaderstage !5 { -.entry: - %0 = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) - %1 = call {}* @llvm.invariant.start.p7i8(i64 -1, i8 addrspace(7)* %0) - %2 = call <4 x float> (...) @lgc.create.read.generic.input.v4f32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) - %3 = bitcast i8 addrspace(7)* %0 to <4 x float> addrspace(7)* - %4 = load <4 x float>, <4 x float> addrspace(7)* %3, align 16 - %5 = insertvalue [4 x <4 x float>] undef, <4 x float> %4, 0 - %6 = getelementptr inbounds i8, i8 addrspace(7)* %0, i64 16 - %7 = bitcast i8 addrspace(7)* %6 to <4 x float> addrspace(7)* - %8 = load <4 x float>, <4 x float> addrspace(7)* %7, align 16 - %9 = insertvalue [4 x <4 x float>] %5, <4 x float> %8, 1 - %10 = getelementptr inbounds i8, i8 addrspace(7)* %0, i64 32 - %11 = bitcast i8 addrspace(7)* %10 to <4 x float> addrspace(7)* - %12 = load <4 x float>, <4 x float> addrspace(7)* %11, align 16 - %13 = insertvalue [4 x <4 x float>] %9, <4 x float> %12, 2 - %14 = getelementptr inbounds i8, i8 addrspace(7)* %0, i64 48 - %15 = bitcast i8 addrspace(7)* %14 to <4 x float> addrspace(7)* - %16 = load <4 x float>, <4 x float> addrspace(7)* %15, align 16 - %17 = insertvalue [4 x <4 x float>] %13, <4 x float> %16, 3 - %18 = insertelement <4 x float> %2, float 1.000000e+00, i32 3 - %19 = call <4 x float> (...) @lgc.create.matrix.times.vector.v4f32([4 x <4 x float>] %17, <4 x float> %18) - call void (...) @lgc.create.write.builtin.output(<4 x float> %19, i32 0, i32 0, i32 poison, i32 poison) - ret void -} - -; Function Attrs: nounwind -declare <4 x float> @lgc.create.matrix.times.vector.v4f32(...) local_unnamed_addr #0 - -; Function Attrs: nounwind readonly -declare <4 x float> @lgc.create.read.generic.input.v4f32(...) local_unnamed_addr #1 - -; Function Attrs: nounwind -declare void @lgc.create.write.builtin.output(...) local_unnamed_addr #0 - -; Function Attrs: nounwind -declare ptr addrspace(7) @lgc.load.buffer.desc(i64, i32, i32, i32) local_unnamed_addr #0 - -; Function Attrs: argmemonly nounwind willreturn -declare {}* @llvm.invariant.start.p7i8(i64 immarg, i8 addrspace(7)* nocapture) #2 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readonly } -attributes #2 = { argmemonly nounwind willreturn } - -!lgc.unlinked = !{!0} -!lgc.options = !{!1} -!lgc.options.VS = !{!2} -!lgc.user.data.nodes = !{!6, !7, !8, !9, !10} - -!0 = !{i32 1} -!1 = !{i32 -622916388, i32 -2087703020, i32 1994742363, i32 -303531948, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2} -!2 = !{i32 -1960408933, i32 578719886, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3} -!5 = !{i32 1} -!6 = !{!"DescriptorTableVaPtr", i32 0, i32 0, i32 0, i32 1, i32 3} -!7 = !{!"DescriptorBuffer", i32 6, i32 0, i32 0, i32 4, i32 0, i32 0, i32 4} -!8 = !{!"DescriptorCombinedTexture", i32 3, i32 0, i32 4, i32 12, i32 0, i32 1, i32 12} -!9 = !{!"DescriptorBuffer", i32 6, i32 0, i32 16, i32 4, i32 0, i32 2, i32 4} -!10 = !{!"IndirectUserDataVaPtr", i32 0, i32 0, i32 1, i32 1, i32 4} - -; ---------------------------------------------------------------------- -; Extract 3: The fragment shader - -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" -target triple = "amdgcn--amdpal" - -define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !lgc.shaderstage !5 { -.entry: - call void (...) @lgc.create.write.generic.output(<4 x float> , i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) - ret void -} - -declare void @lgc.create.write.generic.output(...) local_unnamed_addr #0 - -attributes #0 = { nounwind } - -!lgc.unlinked = !{!0} -!lgc.options = !{!1} -!lgc.options.FS = !{!2} -!lgc.color.export.formats = !{!3} - -!0 = !{i32 1} -!1 = !{i32 1741946712, i32 -2129783189, i32 -1703433192, i32 1078647447, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2} -!2 = !{i32 -1498760258, i32 545756883, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3} -!3 = !{i32 14, i32 7} -!5 = !{i32 6} - diff --git a/lgc/test/IntVectorVertexInput.lgc b/lgc/test/IntVectorVertexInput.lgc deleted file mode 100644 index 7d0aa77fe9..0000000000 --- a/lgc/test/IntVectorVertexInput.lgc +++ /dev/null @@ -1,106 +0,0 @@ -; Test with a vector of integers are a vertex attribute input. - -; RUN: lgc -mcpu=gfx1010 -extract=2 -filetype=obj -o %t.vs.elf - <%s && lgc -mcpu=gfx1010 -extract=3 -filetype=obj -o %t.fs.elf - <%s && lgc -mcpu=gfx1010 -extract=1 -o - -l -glue=1 %s %t.vs.elf %t.fs.elf | FileCheck -check-prefixes=FETCH-ISA %s -; FETCH-ISA: tbuffer_load_format_xyzw v[{{[0-9]+:[0-9]+}}], - -; ---------------------------------------------------------------------- -; Extract 1: The pipeline state with no shaders. - -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" -target triple = "amdgcn--amdpal" - -!lgc.options = !{!0} -!lgc.options.VS = !{!1} -!lgc.options.FS = !{!2} -!lgc.user.data.nodes = !{!3, !4, !5, !6} -!lgc.vertex.inputs = !{!7, !8, !9, !10, !11, !12} -!lgc.color.export.formats = !{!13} -!lgc.input.assembly.state = !{!14} -!lgc.viewport.state = !{!15} -!lgc.rasterizer.state = !{!16} - -!0 = !{i32 -1620978931, i32 620550714, i32 -100642976, i32 -196492550, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2} -!1 = !{i32 1951548461, i32 273960056, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 15, i32 3} -!2 = !{i32 1072849668, i32 -352651751, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 15, i32 3} -!3 = !{!"DescriptorTableVaPtr", i32 0, i32 0, i32 0, i32 1, i32 2} -!4 = !{!"DescriptorBuffer", i32 6, i32 0, i32 0, i32 4, i32 0, i32 0, i32 4} -!5 = !{!"DescriptorCombinedTexture", i32 3, i32 0, i32 4, i32 12, i32 0, i32 1, i32 12} -!6 = !{!"IndirectUserDataVaPtr", i32 0, i32 0, i32 1, i32 1, i32 4} -!7 = !{i32 0, i32 0, i32 0, i32 76, i32 13, i32 7, i32 -1} -!8 = !{i32 1, i32 0, i32 12, i32 76, i32 13, i32 7, i32 -1} -!9 = !{i32 2, i32 0, i32 24, i32 76, i32 11, i32 7, i32 -1} -!10 = !{i32 3, i32 0, i32 32, i32 76, i32 13, i32 7, i32 -1} -!11 = !{i32 4, i32 0, i32 44, i32 76, i32 14, i32 7, i32 -1} -!12 = !{i32 5, i32 0, i32 60, i32 76, i32 14, i32 5, i32 -1} -!13 = !{i32 16} -!14 = !{i32 2, i32 3} -!15 = !{i32 1} -!16 = !{i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 2, i32 1} -!17 = !{i32 0} -!18 = !{i32 4} - -; ---------------------------------------------------------------------- -; Extract 2: The vertex shader - -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" -target triple = "amdgcn--amdpal" - -; Function Attrs: nounwind -define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !lgc.shaderstage !0 { -.entry: - %0 = call <4 x i32> (...) @lgc.create.read.generic.input.v4i32(i32 5, i32 0, i32 0, i32 0, i32 0, i32 undef) - %bc = bitcast <4 x i32> %0 to <4 x float> - %1 = shufflevector <4 x float> %bc, <4 x float> undef, <3 x i32> - call void (...) @lgc.create.write.generic.output(<3 x float> %1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef) - ret void -} - -; Function Attrs: nounwind readonly -declare <4 x i32> @lgc.create.read.generic.input.v4i32(...) local_unnamed_addr #1 - -; Function Attrs: nounwind -declare void @lgc.create.write.generic.output(...) local_unnamed_addr #0 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readonly } - -!lgc.unlinked = !{!0} -!lgc.options = !{!1} -!lgc.options.VS = !{!2} -!lgc.input.assembly.state = !{!4} - -!0 = !{i32 1} -!1 = !{i32 628083063, i32 1661573491, i32 -2141117829, i32 766255606, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2} -!2 = !{i32 1951548461, i32 273960056, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3} -!4 = !{i32 2, i32 3} - -; ---------------------------------------------------------------------- -; Extract 3: The fragment shader - -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" -target triple = "amdgcn--amdpal" - -; Function Attrs: nounwind -define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !lgc.shaderstage !5 { -.entry: - call void (...) @lgc.create.write.generic.output(<4 x float> , i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef) - ret void -} - -; Function Attrs: nounwind -declare void @lgc.create.write.generic.output(...) local_unnamed_addr #0 - -attributes #0 = { nounwind } - -!lgc.unlinked = !{!0} -!lgc.options = !{!1} -!lgc.options.FS = !{!2} -!lgc.color.export.formats = !{!3} -!lgc.input.assembly.state = !{!4} - -!0 = !{i32 1} -!1 = !{i32 -794913950, i32 -27741903, i32 1278784547, i32 441582842, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2} -!2 = !{i32 1072849668, i32 -352651751, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3} -!3 = !{i32 14, i32 7} -!4 = !{i32 0, i32 3} -!5 = !{i32 6} diff --git a/lgc/test/PhiWithArgument.lgc b/lgc/test/PhiWithArgument.lgc index d81b058b83..f7faddbcf5 100644 --- a/lgc/test/PhiWithArgument.lgc +++ b/lgc/test/PhiWithArgument.lgc @@ -47,37 +47,23 @@ attributes #1 = { nounwind readonly } !2 = !{!"PushConst", i32 9, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0} !10 = !{i32 1} -; VS-ISA-LABEL: amdgpu_gs_main_fetchless: +; VS-ISA-LABEL: amdgpu_gs_main: ; VS-ISA: s_mov_b32 exec_lo, -1 -; VS-ISA-NEXT: s_bfe_u32 s3, s3, 0x40018 -; VS-ISA-NEXT: s_bfe_u32 s1, s2, 0x90016 -; VS-ISA-NEXT: s_bfe_u32 s0, s2, 0x9000c -; VS-ISA-NEXT: s_cmp_lg_u32 s3, 0 +; VS-ISA-NEXT: s_bfe_u32 s1, s3, 0x40018 +; VS-ISA-NEXT: s_bfe_u32 s0, s2, 0x90016 +; VS-ISA-NEXT: s_cmp_lg_u32 s1, 0 ; VS-ISA-NEXT: s_barrier ; VS-ISA-NEXT: s_cbranch_scc1 .LBB0_2 -; VS-ISA-NEXT: s_lshl_b32 s2, s1, 12 -; VS-ISA-NEXT: s_or_b32 m0, s2, s0 +; VS-ISA-NEXT: s_bfe_u32 s2, s2, 0x9000c +; VS-ISA-NEXT: s_lshl_b32 s3, s0, 12 +; VS-ISA-NEXT: s_or_b32 m0, s3, s2 ; VS-ISA-NEXT: s_sendmsg sendmsg(MSG_GS_ALLOC_REQ) ; VS-ISA-NEXT: .LBB0_2: ; VS-ISA-NEXT: v_mbcnt_lo_u32_b32 v1, -1, 0 -; VS-ISA-NEXT: v_lshl_or_b32 v1, s3, 5, v1 -; VS-ISA-NEXT: v_cmp_gt_u32_e32 vcc_lo, s1, v1 -; VS-ISA-NEXT: s_and_saveexec_b32 s1, vcc_lo +; VS-ISA-NEXT: v_lshl_or_b32 v1, s1, 5, v1 +; VS-ISA-NEXT: v_cmp_gt_u32_e32 vcc_lo, s0, v1 +; VS-ISA-NEXT: s_and_saveexec_b32 s0, vcc_lo ; VS-ISA-NEXT: s_cbranch_execz .LBB0_4 ; VS-ISA-NEXT: exp prim v0, off, off, off done ; VS-ISA-NEXT: .LBB0_4: -; VS-ISA-NEXT: s_waitcnt expcnt(0) -; VS-ISA-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; VS-ISA-NEXT: v_cmp_gt_u32_e32 vcc_lo, s0, v1 -; VS-ISA-NEXT: s_and_saveexec_b32 s0, vcc_lo -; VS-ISA-NEXT: s_cbranch_execz .LBB0_6 -; VS-ISA-NEXT: v_mov_b32_e32 v0, 1.0 -; VS-ISA-NEXT: v_mov_b32_e32 v1, 0 -; VS-ISA-NEXT: v_cmp_eq_f32_e32 vcc_lo, 1.0, v9 -; VS-ISA-NEXT: exp pos0 v1, v1, v1, v0 done -; VS-ISA-NEXT: v_cndmask_b32_e64 v2, v9, s9, vcc_lo -; VS-ISA-NEXT: v_cndmask_b32_e64 v3, v10, s10, vcc_lo -; VS-ISA-NEXT: v_cndmask_b32_e64 v4, v11, s11, vcc_lo -; VS-ISA-NEXT: exp param1 v2, v3, v4, off -; VS-ISA-NEXT: .LBB0_6: ; VS-ISA-NEXT: s_endpgm diff --git a/lgc/test/TaskShaderOps.lgc b/lgc/test/TaskShaderOps.lgc index 6b30ccbcaf..04ee28a150 100644 --- a/lgc/test/TaskShaderOps.lgc +++ b/lgc/test/TaskShaderOps.lgc @@ -17,9 +17,9 @@ ; CHECK-NEXT: [[tempResult2:%[0-9]*]] = mul i32 [[tempResult1]], [[dimX]] ; CHECK-NEXT: [[flattenId:%[0-9]*]] = add i32 [[tempResult2]], [[groupIdX]] ; CHECK-NEXT: [[entryIndex:%[0-9]*]] = add i32 [[flattenId]], %meshTaskRingIndex -; CHECK: [[drawDataRingDescPtr:%[0-9]*]] = getelementptr <4 x i32>, ptr addrspace(4) %{{[0-9]*}}, i64 14 +; CHECK: [[drawDataRingDescPtr:%[0-9]*]] = getelementptr {{i8|<4 x i32>}}, ptr addrspace(4) %{{[0-9]*}}, i64 {{224|14}} ; CHECK-NEXT: [[drawDataRingDesc:%[0-9]*]] = load <4 x i32>, ptr addrspace(4) [[drawDataRingDescPtr]], align 16 -; CHECK: [[payloadRingDescPtr:%[0-9]*]] = getelementptr <4 x i32>, ptr addrspace(4) %{{[0-9]*}}, i64 13 +; CHECK: [[payloadRingDescPtr:%[0-9]*]] = getelementptr {{i8|<4 x i32>}}, ptr addrspace(4) %{{[0-9]*}}, i64 {{208|13}} ; CHECK-NEXT: [[payloadRingDesc:%[0-9]*]] = load <4 x i32>, ptr addrspace(4) [[payloadRingDescPtr]], align 16 ; CHECK: [[ringSize:%[0-9]*]] = extractelement <4 x i32> [[payloadRingDesc]], i64 2 ; CHECK-NEXT: [[numEntries:%[0-9]*]] = lshr i32 [[ringSize]], 14 diff --git a/lgc/test/UnlinkedTessFetches.lgc b/lgc/test/UnlinkedTessFetches.lgc deleted file mode 100644 index 29420f387c..0000000000 --- a/lgc/test/UnlinkedTessFetches.lgc +++ /dev/null @@ -1,69 +0,0 @@ -; Test that the fetches are added to the merged Vs-Tcs shader. - -; RUN: lgc -mcpu=gfx1010 --print-after=lgc-patch-prepare-pipeline-abi -o - - <%s 2>&1 | FileCheck --check-prefixes=CHECK %s - -; Find the second run of Patch LLVM for preparing pipeline ABI -; CHECK: IR Dump After Patch LLVM for preparing pipeline ABI -; CHECK: define dllexport amdgpu_hs void @_amdgpu_hs_main_fetchless({{.*}}, <4 x float> noundef [[vertInput:%[0-9]*]]) -; CHECK: call amdgpu_ls void @_amdgpu_ls_main_fetchless({{.*}}, <4 x float> [[vertInput]]) -; CHECK: define internal{{.*}} amdgpu_ls void @_amdgpu_ls_main_fetchless({{.*}}, <4 x float> noundef %vertex{{[0-9]*.[0-9]*}}) - -; ModuleID = 'lgcPipeline' -source_filename = "lgcPipeline" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" -target triple = "amdgcn--amdpal" - -; Function Attrs: nounwind -define dllexport spir_func void @lgc.shader.VS.VkMain() local_unnamed_addr #0 !lgc.shaderstage !2 { -.entry: - %0 = call <4 x float> (...) @lgc.create.read.generic.input.v4f32(i32 2, i32 0, i32 0, i32 0, i32 0, i32 undef) - call void (...) @lgc.create.write.generic.output(<4 x float> %0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 undef) - ret void -} - -; Function Attrs: nounwind readonly willreturn -declare <4 x float> @lgc.create.read.generic.input.v4f32(...) local_unnamed_addr #1 - -; Function Attrs: nounwind -declare void @lgc.create.write.generic.output(...) local_unnamed_addr #0 - -; Function Attrs: nounwind -define dllexport spir_func void @lgc.shader.TCS.VkMain() local_unnamed_addr #0 !lgc.shaderstage !10 { -.entry: - ret void -} - -; Function Attrs: nounwind -define dllexport spir_func void @lgc.shader.TES.VkMain() local_unnamed_addr #0 !lgc.shaderstage !13 { -.entry: - ret void -} - -attributes #0 = { nounwind } -attributes #1 = { nounwind readonly willreturn } - -!llpc.tcs.mode = !{!0} -!lgc.client = !{!1} -!lgc.unlinked = !{!2} -!lgc.options = !{!3} -!lgc.options.VS = !{!4} -!lgc.options.TCS = !{!5} -!lgc.options.TES = !{!6} -!lgc.input.assembly.state = !{!7} -!lgc.rasterizer.state = !{!8} -!amdgpu.pal.metadata.msgpack = !{!9} - -!0 = !{i32 3, i32 2, i32 1, i32 0, i32 3} -!1 = !{!"Vulkan"} -!2 = !{i32 1} -!3 = !{i32 594335421, i32 -791667120, i32 2071897816, i32 -1510553338, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2} -!4 = !{i32 -1926247415, i32 372224270, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800} -!5 = !{i32 445265959, i32 1501682766, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800} -!6 = !{i32 2021703860, i32 1144599744, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800} -!7 = !{i32 5, i32 3} -!8 = !{i32 0, i32 0, i32 0, i32 1} -!9 = !{!"\82\B0amdpal.pipelines\91\84\AA.registers\80\B0.spill_threshold\CE\FF\FF\FF\FF\B0.user_data_limit\00\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\87kq8\12\B1v`\CF\BC\B6Q\EB \00\F2\B2\AD.llpc_version\A450.0\AEamdpal.version\92\02\03"} -!10 = !{i32 2} -!11 = distinct !{!11} -!12 = distinct !{!12} -!13 = !{i32 3} diff --git a/lgc/test/UnlinkedVsGsInputs.lgc b/lgc/test/UnlinkedVsGsInputs.lgc deleted file mode 100644 index 68090ad11e..0000000000 --- a/lgc/test/UnlinkedVsGsInputs.lgc +++ /dev/null @@ -1,80 +0,0 @@ -; Check that after merging the VS and GS shader the result has the vertex input as the last parameter, and it is being passed -; to the vertex shader, which expects it as the last parameter. -; RUN: lgc -mcpu=gfx1010 %s -o /dev/null -print-after=lgc-patch-prepare-pipeline-abi 2>&1 | FileCheck --check-prefixes=CHECK %s -; CHECK: IR Dump After Patch LLVM for preparing pipeline ABI on [module] -; CHECK: define dllexport amdgpu_gs void @_amdgpu_gs_main_fetchless({{.*}}, <2 x float> noundef [[vertInput:%[0-9]*]]) -; CHECK: call amdgpu_es void @_amdgpu_es_main_fetchless({{.*}}, <2 x float> [[vertInput]]) -; CHECK: define internal{{.*}} amdgpu_es void @_amdgpu_es_main_fetchless({{.*}}, <2 x float> noundef %vertex0.0) - -; ModuleID = 'lgcPipeline' -source_filename = "lgcPipeline" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" -target triple = "amdgcn--amdpal" - -; Function Attrs: nounwind -define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !lgc.shaderstage !2 { -.entry: - %0 = call <2 x float> (...) @lgc.create.read.generic.input.v2f32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) - call void (...) @lgc.create.write.generic.output(<2 x float> %0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) - ret void -} - -; Function Attrs: nounwind readonly willreturn -declare <2 x float> @lgc.create.read.generic.input.v2f32(...) local_unnamed_addr #1 - -; Function Attrs: nounwind -declare void @lgc.create.write.generic.output(...) local_unnamed_addr #0 - -; Function Attrs: nounwind -define dllexport spir_func void @lgc.shader.GS.main() local_unnamed_addr #0 !lgc.shaderstage !9 { -.entry: - %0 = call <4 x float> (...) @lgc.create.read.builtin.input.v4f32(i32 0, i32 0, i32 0, i32 poison) - %1 = call <4 x float> (...) @lgc.create.read.builtin.input.v4f32(i32 0, i32 0, i32 1, i32 poison) - %2 = call <4 x float> (...) @lgc.create.read.builtin.input.v4f32(i32 0, i32 0, i32 2, i32 poison) - call void (...) @lgc.create.write.builtin.output(<4 x float> %0, i32 0, i32 1024, i32 poison, i32 poison) - call void (...) @lgc.create.write.builtin.output(float undef, i32 1, i32 1024, i32 poison, i32 poison) - call void (...) @lgc.create.write.builtin.output([1 x float] undef, i32 3, i32 5120, i32 poison, i32 poison) - call void (...) @lgc.create.emit.vertex(i32 0) - call void (...) @lgc.create.write.builtin.output(<4 x float> %1, i32 0, i32 1024, i32 poison, i32 poison) - call void (...) @lgc.create.write.builtin.output(float undef, i32 1, i32 1024, i32 poison, i32 poison) - call void (...) @lgc.create.write.builtin.output([1 x float] undef, i32 3, i32 5120, i32 poison, i32 poison) - call void (...) @lgc.create.emit.vertex(i32 0) - call void (...) @lgc.create.write.builtin.output(<4 x float> %2, i32 0, i32 1024, i32 poison, i32 poison) - call void (...) @lgc.create.write.builtin.output(float undef, i32 1, i32 1024, i32 poison, i32 poison) - call void (...) @lgc.create.write.builtin.output([1 x float] undef, i32 3, i32 5120, i32 poison, i32 poison) - call void (...) @lgc.create.emit.vertex(i32 0) - ret void -} - -; Function Attrs: nounwind readonly willreturn -declare <4 x float> @lgc.create.read.builtin.input.v4f32(...) local_unnamed_addr #1 - -; Function Attrs: nounwind -declare void @lgc.create.write.builtin.output(...) local_unnamed_addr #0 - -; Function Attrs: nounwind -declare void @lgc.create.emit.vertex(...) local_unnamed_addr #0 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readonly willreturn } - -!llpc.geometry.mode = !{!0} -!lgc.client = !{!1} -!lgc.unlinked = !{!2} -!lgc.options = !{!3} -!lgc.options.VS = !{!4} -!lgc.options.GS = !{!5} -!lgc.input.assembly.state = !{!6} -!lgc.rasterizer.state = !{!7} -!amdgpu.pal.metadata.msgpack = !{!8} - -!0 = !{i32 3, i32 2, i32 1, i32 3} -!1 = !{!"Vulkan"} -!2 = !{i32 1} -!3 = !{i32 820310003, i32 2145276876, i32 -448892402, i32 165796324, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2} -!4 = !{i32 -314154943, i32 -596910625, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800} -!5 = !{i32 -701582401, i32 -711308304, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800} -!6 = !{i32 2, i32 3} -!7 = !{i32 0, i32 0, i32 0, i32 1} -!8 = !{!"\82\B0amdpal.pipelines\91\84\AA.registers\80\B0.spill_threshold\CE\FF\FF\FF\FF\B0.user_data_limit\00\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF:\0F#\A0\01\E1s\E3\CFi3\02\D8\80\DD\FE\85\AD.llpc_version\A449.0\AEamdpal.version\92\02\03"} -!9 = !{i32 4} diff --git a/lgc/util/GfxRegHandler.cpp b/lgc/util/GfxRegHandler.cpp index 7bc615f599..3d764c2951 100644 --- a/lgc/util/GfxRegHandler.cpp +++ b/lgc/util/GfxRegHandler.cpp @@ -113,7 +113,6 @@ SqImgSampRegHandler::SqImgSampRegHandler(IRBuilder<> *builder, Value *reg, GfxIp m_gfxIpVersion = gfxIpVersion; switch (gfxIpVersion->major) { - case 9: case 10: case 11: m_bitsInfo = SqImgSampRegBitsGfx9; @@ -161,29 +160,6 @@ void SqImgSampRegHandler::setReg(SqSampRegs regId, Value *regValue) { } } -// ===================================================================================================================== -// SqImgSampReg Bits information look up table (Gfx9) -// Refer to imported/chip/gfx9/gfx9_plus_merged_registers.h : SQ_IMG_RSRC_WORD -static constexpr BitsInfo SqImgRsrcRegBitsGfx9[static_cast(SqRsrcRegs::Count)] = { - {0, 0, 32}, // BaseAddress - {1, 0, 8}, // BaseAddressHi - {1, 20, 9}, // Format - {2, 0, 14}, // Width - {2, 14, 14}, // Height - {3, 0, 12}, // DstSelXYZW - {3, 20, 5}, // SwizzleMode - {3, 28, 4}, // Type - {4, 0, 13}, // Depth - {4, 13, 12}, // Pitch - {4, 29, 3}, // BcSwizzle - {3, 12, 4}, // BaseLevel - {3, 16, 4}, // LastLevel - {5, 0, 13}, // BaseArray - {}, // WidthLo - {}, // WidthHi - {}, // ArrayPitch -}; - // ===================================================================================================================== // SqImgSampReg Bits information look up table (Gfx10) // TODO: update comment when the registers file is available @@ -241,9 +217,6 @@ SqImgRsrcRegHandler::SqImgRsrcRegHandler(IRBuilder<> *builder, Value *reg, GfxIp m_gfxIpVersion = gfxIpVersion; switch (gfxIpVersion->major) { - case 9: - m_bitsInfo = SqImgRsrcRegBitsGfx9; - break; case 10: m_bitsInfo = SqImgRsrcRegBitsGfx10; break; @@ -280,8 +253,6 @@ Value *SqImgRsrcRegHandler::getReg(SqRsrcRegs regId) { return m_builder->CreateAdd(getRegCommon(static_cast(regId)), m_one); case SqRsrcRegs::Width: switch (m_gfxIpVersion->major) { - case 9: - return m_builder->CreateAdd(getRegCommon(static_cast(regId)), m_one); case 10: case 11: return m_builder->CreateAdd( @@ -320,9 +291,6 @@ void SqImgRsrcRegHandler::setReg(SqRsrcRegs regId, Value *regValue) { break; case SqRsrcRegs::Width: switch (m_gfxIpVersion->major) { - case 9: - setRegCommon(static_cast(regId), m_builder->CreateSub(regValue, m_one)); - break; case 10: case 11: setRegCombine(static_cast(SqRsrcRegs::WidthLo), static_cast(SqRsrcRegs::WidthHi), diff --git a/llpc/CMakeLists.txt b/llpc/CMakeLists.txt index 0fe1470c68..10faa4a964 100644 --- a/llpc/CMakeLists.txt +++ b/llpc/CMakeLists.txt @@ -36,6 +36,7 @@ endif() option(LLPC_BUILD_TESTS "LLPC build all tests" OFF) option(LLPC_BUILD_LLVM_TOOLS "Build LLVM tools" OFF) option(LLPC_ENABLE_WERROR "Build LLPC with more errors" OFF) +option(LLPC_DISABLE_SPVGEN "Build LLPC tools without SPVGEN" OFF) if(ICD_BUILD_LLPC) set(AMDLLPC_DIR ${CMAKE_CURRENT_BINARY_DIR}) @@ -225,6 +226,7 @@ if(ICD_BUILD_LLPC) lower/llpcSpirvLowerInternalLibraryIntrinsicUtil.cpp lower/LowerGLCompatibility.cpp lower/llpcSpirvLowerCooperativeMatrix.cpp + lower/PrepareContinuations.cpp ) # llpc/translator @@ -357,6 +359,11 @@ target_compile_definitions(llpc_standalone_compiler PUBLIC ${TARGET_ARCHITECTURE_ENDIANESS}ENDIAN_CPU _SPIRV_LLVM_API ) + +if (LLPC_DISABLE_SPVGEN) + target_compile_definitions(llpc_standalone_compiler PUBLIC LLPC_DISABLE_SPVGEN) +endif() + if (LLPC_CLIENT_INTERFACE_MAJOR_VERSION) target_compile_definitions(llpc_standalone_compiler PUBLIC PAL_CLIENT_INTERFACE_MAJOR_VERSION=${PAL_CLIENT_INTERFACE_MAJOR_VERSION} @@ -383,13 +390,16 @@ target_link_libraries(llpc_standalone_compiler PUBLIC cwpack llpc metrohash - spvgen_static vfx vkgc_headers vkgc_util ${CMAKE_DL_LIBS} ) +if (NOT LLPC_DISABLE_SPVGEN) + target_link_libraries(llpc_standalone_compiler PUBLIC spvgen_static) +endif() + if (NOT LLVM_LINK_LLVM_DYLIB) llvm_map_components_to_libnames(llvm_libs AsmParser diff --git a/llpc/context/llpcCompiler.cpp b/llpc/context/llpcCompiler.cpp index 7110ec7112..457419a2e0 100644 --- a/llpc/context/llpcCompiler.cpp +++ b/llpc/context/llpcCompiler.cpp @@ -30,6 +30,7 @@ */ #include "llpcCompiler.h" #include "LLVMSPIRVLib.h" +#include "PrepareContinuations.h" #include "SPIRVEntry.h" #include "SPIRVFunction.h" #include "SPIRVInstruction.h" @@ -49,7 +50,6 @@ #include "llpcSpirvLowerRayTracing.h" #include "llpcSpirvLowerTranslator.h" #include "llpcSpirvLowerUtil.h" -#include "llpcSpirvProcessGpuRtLibrary.h" #include "llpcThreading.h" #include "llpcTimerProfiler.h" #include "llpcUtil.h" @@ -80,6 +80,7 @@ // New version of the code (also handles unknown version, which we treat as latest) #include "llvm/IRPrinter/IRPrintingPasses.h" #endif +#include "continuations/GpurtContext.h" #include "llvm/Linker/Linker.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Format.h" @@ -236,7 +237,6 @@ struct HelperThreadBuildRayTracingPipelineElfPayload { Compiler *compiler; // The compiler instance std::atomic helperThreadJoined; // Whether helper thread has joined std::atomic mainThreadSwitchedContext; // Whether main thread has finished switching context - const bool useGpurt; // Whether any shader in the pipeline uses GPURT }; sys::Mutex Compiler::m_helperThreadMutex; @@ -1000,6 +1000,7 @@ void Compiler::buildShaderModuleResourceUsage( // Process image sampler in default uniform if (varElemTy->isTypeStruct()) { ResourceNodeData textureSymbol = {}; + textureSymbol.binding = defaultUniformSymbol.binding; textureSymbol.location = defaultUniformSymbol.location; textureSymbol.arraySize = getSamplerArraySizeInSpvStruct(varElemTy) * defaultUniformSymbol.arraySize; textureSymbol.isDefaultUniformSampler = true; @@ -1270,7 +1271,25 @@ Result Compiler::BuildColorExportShader(const GraphicsPipelineBuildInfo *pipelin bool hasError = false; context->setDiagnosticHandler(std::make_unique(&hasError)); std::unique_ptr elfLinker(pipeline->createElfLinker({})); - StringRef elfStr = elfLinker->buildColorExportShader(exports, fsOuts->discard); + StringRef hashStr = elfLinker->createColorExportShader(exports, fsOuts->discard); + + BinaryData elf = {}; + MetroHash::Hash colorExportCache = {}; + MetroHash64 cacheHasher; + cacheHasher.Update(reinterpret_cast(hashStr.data()), hashStr.size()); + cacheHasher.Finalize(colorExportCache.bytes); + CacheAccessor cacheAccessor(colorExportCache, getInternalCaches()); + if (cacheAccessor.isInCache()) { + LLPC_OUTS("Cache hit for color export shader.\n"); + elf = cacheAccessor.getElfFromCache(); + } else { + LLPC_OUTS("Cache miss for color export shader.\n"); + StringRef elfStr = elfLinker->compileGlue(0); + elf.pCode = elfStr.data(); + elf.codeSize = elfStr.size(); + cacheAccessor.setElfInCache(elf); + } + context->setDiagnosticHandler(nullptr); if (hasError) @@ -1278,17 +1297,17 @@ Result Compiler::BuildColorExportShader(const GraphicsPipelineBuildInfo *pipelin if (Llpc::EnableOuts()) { ElfReader reader(m_gfxIp); size_t readSize = 0; - if (reader.ReadFromBuffer(elfStr.data(), &readSize) == Result::Success) { + if (reader.ReadFromBuffer(elf.pCode, &readSize) == Result::Success) { LLPC_OUTS("===============================================================================\n"); LLPC_OUTS("// LLPC final color export shader ELF\n"); LLPC_OUTS(reader); } } - void *allocBuf = pipelineInfo->pfnOutputAlloc(pipelineInfo->pInstance, pipelineInfo->pUserData, elfStr.size()); + void *allocBuf = pipelineInfo->pfnOutputAlloc(pipelineInfo->pInstance, pipelineInfo->pUserData, elf.codeSize); uint8_t *code = static_cast(allocBuf); - memcpy(code, elfStr.data(), elfStr.size()); - pipelineOut->pipelineBin.codeSize = elfStr.size(); + memcpy(code, elf.pCode, elf.codeSize); + pipelineOut->pipelineBin.codeSize = elf.codeSize; pipelineOut->pipelineBin.pCode = code; return Result::Success; @@ -1744,9 +1763,9 @@ Result Compiler::buildPipelineInternal(Context *context, ArrayRef gpurtShaderLibrary = createGpurtShaderLibrary(context); - if (!gpurtShaderLibrary) - return Result::ErrorInvalidShader; + context->ensureGpurtLibrary(); + setUseGpurt(&*pipeline); + GpurtContext &gpurtContext = GpurtContext::get(*context); LLPC_OUTS("// LLPC link ray query modules"); @@ -1760,15 +1779,8 @@ Result Compiler::buildPipelineInternal(Context *context, ArrayRef localShaderLibrary; - if (numStagesWithRayQuery > 1) - localShaderLibrary = CloneModule(*gpurtShaderLibrary); - else - localShaderLibrary = std::move(gpurtShaderLibrary); - --numStagesWithRayQuery; - Linker linker(*modules[shaderIndex]); - if (linker.linkInModule(std::move(localShaderLibrary))) + if (linker.linkInModule(CloneModule(*gpurtContext.theModule))) result = Result::ErrorInvalidShader; } } @@ -1792,10 +1804,11 @@ Result Compiler::buildPipelineInternal(Context *context, ArrayRef(shaderInfoEntry->pModuleData); - - SpirvLower::addPasses(context, entryStage, *lowerPassMgr, timerProfiler.getTimer(TimerLower), - /*rayTracing=*/false, moduleData->usage.enableRayQuery, - moduleData->usage.isInternalRtShader); + LowerFlag flag = {}; + flag.isRayTracing = false; + flag.isRayQuery = moduleData->usage.enableRayQuery; + flag.isInternalRtShader = moduleData->usage.isInternalRtShader; + SpirvLower::addPasses(context, entryStage, *lowerPassMgr, timerProfiler.getTimer(TimerLower), flag); // Run the passes. bool success = runPasses(&*lowerPassMgr, modules[shaderIndex].get()); if (!success) { @@ -2443,63 +2456,6 @@ Result Compiler::BuildComputePipeline(const ComputePipelineBuildInfo *pipelineIn return Result::Success; } -// ===================================================================================================================== -// Load GPURT shader library indicated by the pipeline context and do initial pre-processing. -// -// @param context : the context -// @return the LLVM module containing the GPURT shader library -std::unique_ptr Compiler::createGpurtShaderLibrary(Context *context) { - const RtState *rtState = context->getPipelineContext()->getRayTracingState(); - - ShaderModuleData moduleData = {}; - moduleData.binCode = rtState->gpurtShaderLibrary; - moduleData.binType = BinaryType::Spirv; - moduleData.usage.keepUnusedFunctions = true; - moduleData.usage.rayQueryLibrary = true; - moduleData.usage.enableRayQuery = true; - - PipelineShaderInfo shaderInfo = {}; - shaderInfo.entryStage = ShaderStageCompute; - shaderInfo.pEntryTarget = Vkgc::getEntryPointNameFromSpirvBinary(&rtState->gpurtShaderLibrary); - shaderInfo.pModuleData = &moduleData; - - // Disable fast math contract on OpDot when there is no hardware intersectRay - bool hwIntersectRay = rtState->bvhResDesc.dataSizeInDwords > 0; - shaderInfo.options.noContractOpDot = !hwIntersectRay; - - auto module = std::make_unique(RtName::TraceRayKHR, *context); - context->setModuleTargetMachine(module.get()); - - TimerProfiler timerProfiler(context->getPipelineHashCode(), "LLPC", TimerProfiler::PipelineTimerEnableMask); - std::unique_ptr lowerPassMgr(lgc::PassManager::Create(context->getLgcContext())); - SpirvLower::registerTranslationPasses(*lowerPassMgr); - - timerProfiler.addTimerStartStopPass(*lowerPassMgr, TimerTranslate, true); - - // SPIR-V translation, then dump the result. - lowerPassMgr->addPass(SpirvLowerTranslator(ShaderStageCompute, &shaderInfo)); - if (EnableOuts()) { - lowerPassMgr->addPass( - PrintModulePass(outs(), "\n" - "===============================================================================\n" - "// LLPC SPIRV-to-LLVM translation results\n")); - } - - lowerPassMgr->addPass(SpirvProcessGpuRtLibrary()); - lowerPassMgr->addPass(SpirvLowerRayQuery(true)); - lowerPassMgr->addPass(AlwaysInlinerPass()); - // Stop timer for translate. - timerProfiler.addTimerStartStopPass(*lowerPassMgr, TimerTranslate, false); - - bool success = runPasses(&*lowerPassMgr, module.get()); - if (!success) { - LLPC_ERRS("Failed to translate SPIR-V or run per-shader passes\n"); - return {}; - } - - return module; -} - // ===================================================================================================================== // Build ray tracing pipeline from the specified info. // @@ -2830,8 +2786,8 @@ void helperThreadBuildRayTracingPipelineElf(IHelperThreadProvider *helperThreadP helperThreadPayload->rayTracingContext->setPipelineState(&*pipeline, /*hasher=*/nullptr, false); context->setBuilder(builderContext->createBuilder(&*pipeline)); - if (helperThreadPayload->useGpurt) - helperThreadPayload->compiler->setUseGpurt(&*pipeline); + context->ensureGpurtLibrary(); + helperThreadPayload->compiler->setUseGpurt(&*pipeline); TimerProfiler timerProfiler(context->getPipelineHashCode(), "LLPC", TimerProfiler::PipelineTimerEnableMask); @@ -2960,12 +2916,12 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, std::unique_ptr pipeline(builderContext->createPipeline()); rtContext.setPipelineState(&*pipeline, /*hasher=*/nullptr, unlinked); - bool needGpurtShaderLibrary = false; std::vector> modules(shaderInfo.size()); mainContext->setBuilder(builderContext->createBuilder(&*pipeline)); - const bool needGpurtForContinuations = (pipelineInfo->mode == Vkgc::LlpcRaytracingMode::Continuations); - if (needGpurtForContinuations) - needGpurtShaderLibrary = true; + const bool continuationsMode = (pipelineInfo->mode == Vkgc::LlpcRaytracingMode::Continuations); + bool needTraversal = false; + + mainContext->ensureGpurtLibrary(); // Create empty modules and set target machine in each. for (unsigned shaderIndex = 0; shaderIndex < shaderInfo.size(); ++shaderIndex) { @@ -2987,7 +2943,7 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, const ShaderModuleData *moduleData = reinterpret_cast(shaderInfoEntry->pModuleData); if (moduleData->usage.enableRayQuery || moduleData->usage.hasTraceRay) - needGpurtShaderLibrary = true; + needTraversal = true; std::unique_ptr lowerPassMgr(lgc::PassManager::Create(builderContext)); lowerPassMgr->setPassIndex(&passIndex); @@ -3014,13 +2970,8 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, // Record which module calls TraceRay(), except the first one (For indirect mode, it is the entry function which will // never call TraceRay(). For inlined mode, we don't need to care). std::vector moduleCallsTraceRay; - std::unique_ptr gpurtShaderLibrary; - if (needGpurtShaderLibrary) { - gpurtShaderLibrary = createGpurtShaderLibrary(mainContext); - setUseGpurt(&*pipeline); - if (!gpurtShaderLibrary) - return Result::ErrorInvalidShader; - } + setUseGpurt(&*pipeline); + GpurtContext &gpurtContext = GpurtContext::get(*mainContext); // Can currently only support all-or-nothing indirect for various reasons, the most important one being that the // Vulkan driver's shader group handle construction logic assume that if any shader identifier uses a VA mapping, then @@ -3040,9 +2991,9 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, const ShaderModuleData *moduleData = reinterpret_cast(shaderInfoEntry->pModuleData); auto shaderModule = std::move(modules[shaderIndex]); - if (moduleData->usage.enableRayQuery || needGpurtForContinuations) { + if (moduleData->usage.enableRayQuery) { Linker linker(*shaderModule); - if (linker.linkInModule(CloneModule(*gpurtShaderLibrary))) + if (linker.linkInModule(CloneModule(*gpurtContext.theModule))) return Result::ErrorInvalidShader; } @@ -3053,30 +3004,36 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, // TODO: For continuations, we only need to compile the GpuRt module separately if there are TraceRay usages // to compile the Traversal shader. For callable shaders, it is not required. - if (gpurtShaderLibrary) { + if (needTraversal) { StringRef traceRayFuncName = mainContext->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_TRACE_RAY); StringRef fetchTrianglePosFunc = mainContext->getPipelineContext()->getRayTracingFunctionName( Vkgc::RT_ENTRY_FETCH_HIT_TRIANGLE_FROM_NODE_POINTER); + std::unique_ptr traversal = CloneModule(*gpurtContext.theModule); + // Prepare GpuRt module to be compiled separately - for (auto funcIt = gpurtShaderLibrary->begin(), funcEnd = gpurtShaderLibrary->end(); funcIt != funcEnd;) { + for (auto funcIt = traversal->begin(), funcEnd = traversal->end(); funcIt != funcEnd;) { Function *func = &*funcIt++; if (func->getName().starts_with(traceRayFuncName)) { // We assigned GpuRt functions weak linkage prior to linking into app modules to not confuse the entry // point determination mechanism. Undo that on TraceRay to make it the entry of the module. func->setLinkage(GlobalValue::ExternalLinkage); lgc::rt::setLgcRtShaderStage(func, lgc::rt::RayTracingShaderStage::Traversal); - } else if (func->getLinkage() == GlobalValue::WeakAnyLinkage && - !func->getName().starts_with(fetchTrianglePosFunc) && !func->empty()) { + } else if (func->getLinkage() == GlobalValue::WeakAnyLinkage && !func->empty()) { // Preserve fetchTrianglePosFunc because we need to inline it into Traversal later on. // Remove other function definitions both for compile speed, and to work around an // issue with private globals used in multiple functions in GpuRt which confuses SpirvLowerGlobal. - func->dropAllReferences(); - func->eraseFromParent(); + bool isFetchTrianglePosFunc = func->getName().starts_with(fetchTrianglePosFunc); + bool isContinuationFunc = continuationsMode && func->getName().starts_with("_cont_"); + + if (!isFetchTrianglePosFunc && !isContinuationFunc) { + func->dropAllReferences(); + func->eraseFromParent(); + } } } - newModules.push_back(std::move(gpurtShaderLibrary)); + newModules.push_back(std::move(traversal)); moduleCallsTraceRay.push_back(false); moduleUsesRayQuery.push_back(false); } @@ -3088,8 +3045,14 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, auto module = (newModules[i].get()); std::unique_ptr passMgr(lgc::PassManager::Create(builderContext)); SpirvLower::registerLoweringPasses(*passMgr); - SpirvLower::addPasses(mainContext, ShaderStageCompute, *passMgr, timerProfiler.getTimer(TimerLower), true, - moduleUsesRayQuery[i], false); + LowerFlag flag = {}; + flag.isRayTracing = true; + flag.isRayQuery = moduleUsesRayQuery[i]; + flag.isInternalRtShader = false; + SpirvLower::addPasses(mainContext, ShaderStageCompute, *passMgr, timerProfiler.getTimer(TimerLower), flag); + if (continuationsMode) { + passMgr->addPass(PrepareContinuations()); + } bool success = runPasses(&*passMgr, module); if (!success) { LLPC_ERRS("Failed to translate SPIR-V or run per-shader passes\n"); @@ -3128,8 +3091,7 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, for (const auto &module : newModules) modulePointers.push_back(module.get()); HelperThreadBuildRayTracingPipelineElfPayload helperThreadPayload = { - modulePointers, pipelineElfs, shaderProps, moduleCallsTraceRay, results, &rtContext, - this, false, false, needGpurtShaderLibrary}; + modulePointers, pipelineElfs, shaderProps, moduleCallsTraceRay, results, &rtContext, this, false, false}; helperThreadProvider->SetTasks(&helperThreadBuildRayTracingPipelineElf, newModules.size(), static_cast(&helperThreadPayload)); diff --git a/llpc/context/llpcCompiler.h b/llpc/context/llpcCompiler.h index 0e39d3c590..8c10ca3249 100644 --- a/llpc/context/llpcCompiler.h +++ b/llpc/context/llpcCompiler.h @@ -187,7 +187,6 @@ class Compiler : public ICompiler { bool canUseRelocatableGraphicsShaderElf(const llvm::ArrayRef &shaderInfo, const GraphicsPipelineBuildInfo *pipelineInfo); bool canUseRelocatableComputeShaderElf(const ComputePipelineBuildInfo *pipelineInfo); - std::unique_ptr createGpurtShaderLibrary(Context *context); Result buildRayTracingPipelineInternal(RayTracingContext &rtContext, llvm::ArrayRef shaderInfo, bool unlinked, std::vector &pipelineElfs, diff --git a/llpc/context/llpcComputeContext.cpp b/llpc/context/llpcComputeContext.cpp index c2a6b9b9f0..dccd98fc41 100644 --- a/llpc/context/llpcComputeContext.cpp +++ b/llpc/context/llpcComputeContext.cpp @@ -56,6 +56,7 @@ ComputeContext::ComputeContext(GfxIpVersion gfxIp, const ComputePipelineBuildInf setUnlinked(pipelineInfo->unlinked); m_resourceMapping = pipelineInfo->resourceMapping; m_pipelineLayoutApiHash = pipelineInfo->pipelineLayoutApiHash; + m_pipelineApiHash = pipelineInfo->pipelineApiHash; } // ===================================================================================================================== diff --git a/llpc/context/llpcContext.cpp b/llpc/context/llpcContext.cpp index ec5f7946fb..70e1cb2af7 100644 --- a/llpc/context/llpcContext.cpp +++ b/llpc/context/llpcContext.cpp @@ -31,9 +31,18 @@ #include "llpcContext.h" #include "SPIRVInternal.h" #include "continuations/ContinuationsDialect.h" +#include "continuations/GpurtContext.h" #include "llpcCompiler.h" #include "llpcDebug.h" #include "llpcPipelineContext.h" +#include "llpcSpirvLower.h" +#include "llpcSpirvLowerAccessChain.h" +#include "llpcSpirvLowerCfgMerges.h" +#include "llpcSpirvLowerGlobal.h" +#include "llpcSpirvLowerRayQuery.h" +#include "llpcSpirvLowerTranslator.h" +#include "llpcSpirvProcessGpuRtLibrary.h" +#include "llpcTimerProfiler.h" #include "vkgcMetroHash.h" #include "lgc/Builder.h" #include "lgc/GpurtDialect.h" @@ -41,18 +50,22 @@ #include "lgc/LgcCpsDialect.h" #include "lgc/LgcDialect.h" #include "lgc/LgcRtDialect.h" +#include "lgc/PassManager.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/Bitstream/BitstreamReader.h" #include "llvm/Bitstream/BitstreamWriter.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IRPrinter/IRPrintingPasses.h" #include "llvm/Linker/Linker.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -99,6 +112,7 @@ LgcContext *Context::getLgcContext() { if (!m_targetMachine) report_fatal_error(Twine("Unknown target '") + Twine(gpuName) + Twine("'")); m_builderContext.reset(LgcContext::create(&*m_targetMachine, *this, PAL_CLIENT_INTERFACE_MAJOR_VERSION)); + lgc::GpurtContext::get(*this).theModule.reset(); // Pass the state of LLPC_OUTS on to LGC. LgcContext::setLlpcOuts(EnableOuts() ? &outs() : nullptr); @@ -195,4 +209,68 @@ void Context::setModuleTargetMachine(Module *module) { module->setDataLayout(dataLayoutStr); } +// ===================================================================================================================== +// Ensure that a compatible GPURT library module is attached to this context via GpurtContext. +void Context::ensureGpurtLibrary() { + // Check whether we already have a GPURT library module that can be used + const Vkgc::RtState *rtState = getPipelineContext()->getRayTracingState(); + auto &gpurtContext = lgc::GpurtContext::get(*this); + GpurtKey key = {}; + key.gpurtFeatureFlags = rtState->gpurtFeatureFlags; // gpurtFeatureFlags affect which GPURT library we're using + key.hwIntersectRay = rtState->bvhResDesc.dataSizeInDwords > 0; + + if (gpurtContext.theModule && key != m_currentGpurtKey) + gpurtContext.theModule.reset(); + + if (gpurtContext.theModule) + return; + + // Create the GPURT library module + m_currentGpurtKey = key; + + ShaderModuleData moduleData = {}; + moduleData.binCode = rtState->gpurtShaderLibrary; + moduleData.binType = BinaryType::Spirv; + moduleData.usage.keepUnusedFunctions = true; + moduleData.usage.rayQueryLibrary = true; + moduleData.usage.enableRayQuery = true; + + PipelineShaderInfo shaderInfo = {}; + shaderInfo.entryStage = ShaderStageCompute; + shaderInfo.pEntryTarget = Vkgc::getEntryPointNameFromSpirvBinary(&rtState->gpurtShaderLibrary); + shaderInfo.pModuleData = &moduleData; + + // Disable fast math contract on OpDot when there is no hardware intersectRay + shaderInfo.options.noContractOpDot = !key.hwIntersectRay; + + auto gpurt = std::make_unique("_cs_", *this); + setModuleTargetMachine(gpurt.get()); + + TimerProfiler timerProfiler(getPipelineHashCode(), "LLPC GPURT", TimerProfiler::PipelineTimerEnableMask); + std::unique_ptr lowerPassMgr(lgc::PassManager::Create(getLgcContext())); + SpirvLower::registerTranslationPasses(*lowerPassMgr); + + timerProfiler.addTimerStartStopPass(*lowerPassMgr, TimerTranslate, true); + + lowerPassMgr->addPass(SpirvLowerTranslator(ShaderStageCompute, &shaderInfo, "_gpurtvar_")); + if (EnableOuts()) { + lowerPassMgr->addPass( + PrintModulePass(outs(), "\n" + "===============================================================================\n" + "// LLPC SPIRV-to-LLVM translation results\n")); + } + + lowerPassMgr->addPass(SpirvLowerCfgMerges()); + lowerPassMgr->addPass(SpirvProcessGpuRtLibrary()); + lowerPassMgr->addPass(SpirvLowerRayQuery(true)); + lowerPassMgr->addPass(AlwaysInlinerPass()); + lowerPassMgr->addPass(SpirvLowerAccessChain()); + lowerPassMgr->addPass(SpirvLowerGlobal()); + timerProfiler.addTimerStartStopPass(*lowerPassMgr, TimerTranslate, false); + + lowerPassMgr->run(*gpurt); + + gpurtContext.theModule = std::move(gpurt); +} + } // namespace Llpc diff --git a/llpc/context/llpcContext.h b/llpc/context/llpcContext.h index 992c2d5a6e..f8ac03cbe5 100644 --- a/llpc/context/llpcContext.h +++ b/llpc/context/llpcContext.h @@ -109,8 +109,6 @@ class Context : public llvm::LLVMContext { unsigned getActiveShaderStageCount() const { return m_pipelineContext->getActiveShaderStageCount(); } - const char *getGpuNameAbbreviation() const { return PipelineContext::getGpuNameAbbreviation(m_gfxIp); } - GfxIpVersion getGfxIpVersion() const { return m_gfxIp; } uint64_t getPipelineHashCode() const { return m_pipelineContext->getPipelineHashCode(); } @@ -124,6 +122,8 @@ class Context : public llvm::LLVMContext { // Sets triple and data layout in specified module from the context's target machine. void setModuleTargetMachine(llvm::Module *module); + void ensureGpurtLibrary(); + private: Context() = delete; Context(const Context &) = delete; @@ -147,6 +147,18 @@ class Context : public llvm::LLVMContext { std::unique_ptr m_dialectContext; unsigned m_useCount = 0; // Number of times this context is used. + + struct GpurtKey { + unsigned gpurtFeatureFlags; + bool hwIntersectRay; + + bool operator==(const GpurtKey &other) const { + return gpurtFeatureFlags == other.gpurtFeatureFlags && hwIntersectRay == other.hwIntersectRay; + } + bool operator!=(const GpurtKey &other) const { return !(*this == other); } + }; + + GpurtKey m_currentGpurtKey; }; } // namespace Llpc diff --git a/llpc/context/llpcGraphicsContext.cpp b/llpc/context/llpcGraphicsContext.cpp index 1839b03d41..58480e7cfb 100644 --- a/llpc/context/llpcGraphicsContext.cpp +++ b/llpc/context/llpcGraphicsContext.cpp @@ -44,9 +44,6 @@ using namespace Vkgc; namespace Llpc { -// -disable-fetch-shader: disable the fetch shader when doing unlinked shaders. -static cl::opt DisableFetchShader("disable-fetch-shader", cl::desc("Disable fetch shaders"), cl::init(false)); - // -disable-color-export-shader: disable the color export shader when doing unlinked shaders. static cl::opt DisableColorExportShader("disable-color-export-shader", cl::desc("Disable color export shaders"), cl::init(false)); @@ -88,6 +85,7 @@ GraphicsContext::GraphicsContext(GfxIpVersion gfxIp, const GraphicsPipelineBuild m_resourceMapping = pipelineInfo->resourceMapping; m_pipelineLayoutApiHash = pipelineInfo->pipelineLayoutApiHash; + m_pipelineApiHash = pipelineInfo->pipelineApiHash; } // ===================================================================================================================== @@ -192,7 +190,7 @@ void GraphicsContext::setPipelineState(Pipeline *pipeline, Util::MetroHash64 *ha } } - if ((stageMask & ~shaderStageToMask(ShaderStageFragment)) && (!unlinked || DisableFetchShader)) { + if ((stageMask & ~shaderStageToMask(ShaderStageFragment))) { // Set vertex input descriptions to the middle-end. setVertexInputDescriptions(pipeline, hasher); } diff --git a/llpc/context/llpcPipelineContext.cpp b/llpc/context/llpcPipelineContext.cpp index 888afca8a6..c910569236 100644 --- a/llpc/context/llpcPipelineContext.cpp +++ b/llpc/context/llpcPipelineContext.cpp @@ -148,33 +148,6 @@ PipelineContext::PipelineContext(GfxIpVersion gfxIp, MetroHash::Hash *pipelineHa PipelineContext::~PipelineContext() { } -// ===================================================================================================================== -// Gets the name string of the abbreviation for GPU target according to graphics IP version info. -// -// @param gfxIp : Graphics IP version info -const char *PipelineContext::getGpuNameAbbreviation(GfxIpVersion gfxIp) { - const char *nameAbbr = nullptr; - switch (gfxIp.major) { - case 6: - nameAbbr = "SI"; - break; - case 7: - nameAbbr = "CI"; - break; - case 8: - nameAbbr = "VI"; - break; - case 9: - nameAbbr = "GFX9"; - break; - default: - nameAbbr = "UNKNOWN"; - break; - } - - return nameAbbr; -} - // ===================================================================================================================== // Gets the hash code of input shader with specified shader stage. // @@ -742,6 +715,14 @@ ShaderOptions PipelineContext::computeShaderOptions(const PipelineShaderInfo &sh return shaderOptions; } +// ===================================================================================================================== +// Gets pipeline hash code. If pipeline api hash is non-zero, this will be used directly as the "stable" hash. +uint64_t PipelineContext::getPipelineHashCode() const { + if (m_pipelineApiHash != 0) + return m_pipelineApiHash; + return MetroHash::compact64(&m_pipelineHash); +} + // ===================================================================================================================== // Get wave size used for raytracing unsigned PipelineContext::getRayTracingWaveSize() const { diff --git a/llpc/context/llpcPipelineContext.h b/llpc/context/llpcPipelineContext.h index cd51374f20..b00ed8a898 100644 --- a/llpc/context/llpcPipelineContext.h +++ b/llpc/context/llpcPipelineContext.h @@ -166,13 +166,11 @@ class PipelineContext { virtual void collectAttributeDataSize(llvm::Type *type, const llvm::DataLayout &dataLayout) {} virtual void collectBuiltIn(unsigned builtIn) {} - static const char *getGpuNameAbbreviation(GfxIpVersion gfxIp); - // Gets graphics IP version info GfxIpVersion getGfxIpVersion() const { return m_gfxIp; } - // Gets pipeline hash code compacted to 64-bits. - uint64_t getPipelineHashCode() const { return MetroHash::compact64(&m_pipelineHash); } + // Gets pipeline hash code. + uint64_t getPipelineHashCode() const; // Gets cache hash code compacted to 64-bits. uint64_t get64BitCacheHashCode() const { return MetroHash::compact64(&m_cacheHash); } @@ -250,6 +248,7 @@ class PipelineContext { MetroHash::Hash m_cacheHash; // Cache hash code ResourceMappingData m_resourceMapping; // Contains resource mapping nodes and static descriptor values uint64_t m_pipelineLayoutApiHash; // Pipeline Layout Api Hash + uint64_t m_pipelineApiHash; // Pipeline Api Hash. private: PipelineContext() = delete; diff --git a/llpc/context/llpcRayTracingContext.cpp b/llpc/context/llpcRayTracingContext.cpp index 61be633825..bc552ccf20 100644 --- a/llpc/context/llpcRayTracingContext.cpp +++ b/llpc/context/llpcRayTracingContext.cpp @@ -58,6 +58,7 @@ RayTracingContext::RayTracingContext(GfxIpVersion gfxIP, const RayTracingPipelin m_resourceMapping = pipelineInfo->resourceMapping; m_pipelineLayoutApiHash = pipelineInfo->pipelineLayoutApiHash; + m_pipelineApiHash = pipelineInfo->pipelineApiHash; if (representativeShaderInfo) m_representativeShaderInfo.options = representativeShaderInfo->options; diff --git a/llpc/docs/amdllpc.md b/llpc/docs/amdllpc.md index 6828f91285..fcc9c1e296 100644 --- a/llpc/docs/amdllpc.md +++ b/llpc/docs/amdllpc.md @@ -80,7 +80,6 @@ amdllpc [...] [...] | `-auto-layout-desc` | Automatically create descriptor layout based on resource usages | false for .pipe files, true for individual shaders | | `-robust-buffer-access` | Validate if buffer index is out of bounds | false | | `-enable-relocatable-shader-elf` | Compile pipelines using relocatable shader elf | false | -| `-enable-scratch-bounds-checks` | Insert scratch access bounds checks on loads and stores | false | | `-scalar-block-layout` | Allow scalar block layout of types | false | * Dump options diff --git a/llpc/lower/PrepareContinuations.cpp b/llpc/lower/PrepareContinuations.cpp new file mode 100644 index 0000000000..8f03bc97c2 --- /dev/null +++ b/llpc/lower/PrepareContinuations.cpp @@ -0,0 +1,80 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file PrepareContinuations.cpp + * @brief LLPC source file: contains implementation of class Llpc::PrepareContinuations. + *********************************************************************************************************************** + */ +#include "PrepareContinuations.h" +#include "compilerutils/CompilerUtils.h" +#include "continuations/GpurtContext.h" +#include "llpcContext.h" +#include "lgc/Builder.h" + +#define DEBUG_TYPE "prepare-continuations" +using namespace lgc; +using namespace llvm; +using namespace lgc::rt; +using namespace CompilerUtils; + +namespace Llpc { +PrepareContinuations::PrepareContinuations() { +} + +// ===================================================================================================================== +// Executes this SPIR-V lowering pass on the specified LLVM module. +// +// @param [in/out] module : LLVM module to be run on +// @param [in/out] analysisManager : Analysis manager to use for this transformation +PreservedAnalyses PrepareContinuations::run(Module &module, ModuleAnalysisManager &analysisManager) { + LLVM_DEBUG(dbgs() << "Run the pass PrepareContinuations\n"); + SpirvLower::init(&module); + const auto *rtState = m_context->getPipelineContext()->getRayTracingState(); + ComputeShaderMode mode = {}; + mode.workgroupSizeX = rtState->threadGroupSizeX; + mode.workgroupSizeY = rtState->threadGroupSizeY; + mode.workgroupSizeZ = rtState->threadGroupSizeZ; + Pipeline::setComputeShaderMode(module, mode); + + if (module.getName().starts_with("main")) { + m_shaderStage = ShaderStageRayTracingRayGen; + auto FuncTy = FunctionType::get(m_builder->getVoidTy(), {}, false); + GpurtContext &gpurtContext = GpurtContext::get(*m_context); + Function *contKernel = gpurtContext.theModule->getFunction("_cont_KernelEntry"); + Function *entryFunc = Function::Create(FuncTy, GlobalValue::ExternalLinkage, "main", module); + auto *bb = BasicBlock::Create(entryFunc->getContext(), "entry", entryFunc); + CrossModuleInliner inliner; + IRBuilder<> builder(bb); + builder.SetInsertPoint(builder.CreateRetVoid()); + inliner.inlineCall(builder, contKernel, {}); + setLgcRtShaderStage(entryFunc, RayTracingShaderStage::KernelEntry); + lgc::Pipeline::markShaderEntryPoint(entryFunc, lgc::ShaderStage::Compute); + } + + return PreservedAnalyses::none(); +} + +} // namespace Llpc diff --git a/llpc/lower/PrepareContinuations.h b/llpc/lower/PrepareContinuations.h new file mode 100644 index 0000000000..9df5e60f46 --- /dev/null +++ b/llpc/lower/PrepareContinuations.h @@ -0,0 +1,44 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file PrepareContinuations.h + * @brief LLPC header file: contains declaration of Llpc::PrepareContinuations, this class is the pass before + * LowerRayTracingPipeline and is mostly used to integrate the KernelEntry function from gpurt module + * + *********************************************************************************************************************** + */ +#pragma once + +#include "llpcSpirvLower.h" +#include "llvm/IR/PassManager.h" + +namespace Llpc { +class PrepareContinuations : public SpirvLower, public llvm::PassInfoMixin { +public: + PrepareContinuations(); + llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); +}; +} // namespace Llpc diff --git a/llpc/lower/llpcSpirvLower.cpp b/llpc/lower/llpcSpirvLower.cpp index 2dbc5697ad..0c7501e102 100644 --- a/llpc/lower/llpcSpirvLower.cpp +++ b/llpc/lower/llpcSpirvLower.cpp @@ -172,16 +172,14 @@ void SpirvLower::removeConstantExpr(Context *context, GlobalVariable *global) { // @param stage : Shader stage // @param [in/out] passMgr : Pass manager to add passes to // @param lowerTimer : Timer to time lower passes with, nullptr if not timing -// @param rayTracing : Whether we are lowering a ray tracing pipeline shader -// @param rayQuery : Whether we are lowering a ray query library -// @param isInternalRtShader : Whether we are lowering an internal ray tracing shader +// @param lowerFlag : Add the required pass based on the flag void SpirvLower::addPasses(Context *context, ShaderStage stage, lgc::PassManager &passMgr, Timer *lowerTimer, - bool rayTracing, bool rayQuery, bool isInternalRtShader) { + LowerFlag lowerFlag) { // Start timer for lowering passes. if (lowerTimer) LgcContext::createAndAddStartStopTimer(passMgr, lowerTimer, true); - if (isInternalRtShader) + if (lowerFlag.isInternalRtShader) passMgr.addPass(SpirvProcessGpuRtLibrary()); // Lower SPIR-V CFG merges before inlining @@ -195,7 +193,7 @@ void SpirvLower::addPasses(Context *context, ShaderStage stage, lgc::PassManager // Lower SPIR-V access chain passMgr.addPass(SpirvLowerAccessChain()); - if (rayQuery) + if (lowerFlag.isRayQuery) passMgr.addPass(SpirvLowerRayQueryPostInline()); // Lower SPIR-V terminators @@ -257,7 +255,7 @@ void SpirvLower::addPasses(Context *context, ShaderStage stage, lgc::PassManager // Lower SPIR-V ray tracing related stuff, including entry point generation, lgc.rt dialect handling, some of // lgc.gpurt dialect handling. // And do inlining after SpirvLowerRayTracing as it will produce some extra functions. - if (rayTracing) { + if (lowerFlag.isRayTracing) { assert(context->getPipelineType() == PipelineType::RayTracing); auto *pipelineInfo = static_cast(context->getPipelineBuildInfo()); if (pipelineInfo->mode != Vkgc::LlpcRaytracingMode::Continuations) { @@ -266,7 +264,7 @@ void SpirvLower::addPasses(Context *context, ShaderStage stage, lgc::PassManager } } - if (rayTracing || rayQuery || isInternalRtShader) { + if (lowerFlag.isRayTracing || lowerFlag.isRayQuery || lowerFlag.isInternalRtShader) { FunctionPassManager fpm; fpm.addPass(SROAPass(SROAOptions::PreserveCFG)); fpm.addPass(InstCombinePass(instCombineOpt)); diff --git a/llpc/lower/llpcSpirvLower.h b/llpc/lower/llpcSpirvLower.h index c2e866df35..947124a04e 100644 --- a/llpc/lower/llpcSpirvLower.h +++ b/llpc/lower/llpcSpirvLower.h @@ -52,6 +52,16 @@ namespace Llpc { class Context; +union LowerFlag { + struct { + unsigned isRayTracing : 1; // Whether we are lowering a ray tracing pipeline shader + unsigned isRayQuery : 1; // Whether we are lowering a ray query library + unsigned isInternalRtShader : 1; // Whether we are lowering an internal ray tracing shader + unsigned reserved : 29; + }; + unsigned u32All; +}; + // ===================================================================================================================== // Represents the pass of SPIR-V lowering operations, as the base class. class SpirvLower { @@ -60,7 +70,7 @@ class SpirvLower { // Add per-shader lowering passes to pass manager static void addPasses(Context *context, ShaderStage stage, lgc::PassManager &passMgr, llvm::Timer *lowerTimer, - bool rayTracing, bool rayQuery, bool isInternalRtShader); + LowerFlag lowerFlag); // Register all the translation passes into the given pass manager static void registerTranslationPasses(lgc::PassManager &passMgr); // Register all the lowering passes into the given pass manager diff --git a/llpc/lower/llpcSpirvLowerAccessChain.cpp b/llpc/lower/llpcSpirvLowerAccessChain.cpp index 5036f55d22..1221a407c4 100644 --- a/llpc/lower/llpcSpirvLowerAccessChain.cpp +++ b/llpc/lower/llpcSpirvLowerAccessChain.cpp @@ -50,15 +50,6 @@ namespace Llpc { // @param [in/out] module : LLVM module to be run on // @param [in/out] analysisManager : Analysis manager to use for this transformation PreservedAnalyses SpirvLowerAccessChain::run(Module &module, ModuleAnalysisManager &analysisManager) { - runImpl(module); - return PreservedAnalyses::none(); -} - -// ===================================================================================================================== -// Executes this SPIR-V lowering pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -bool SpirvLowerAccessChain::runImpl(Module &module) { LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-Access-Chain\n"); SpirvLower::init(&module); @@ -66,7 +57,7 @@ bool SpirvLowerAccessChain::runImpl(Module &module) { // Invoke handling of "getelementptr", "load" and "store" instructions visit(m_module); - return true; + return PreservedAnalyses::none(); } // ===================================================================================================================== diff --git a/llpc/lower/llpcSpirvLowerAccessChain.h b/llpc/lower/llpcSpirvLowerAccessChain.h index fde884b00c..c8f91148c4 100644 --- a/llpc/lower/llpcSpirvLowerAccessChain.h +++ b/llpc/lower/llpcSpirvLowerAccessChain.h @@ -48,8 +48,6 @@ class SpirvLowerAccessChain : public SpirvLower, virtual void visitLoadInst(llvm::LoadInst &loadInst); virtual void visitStoreInst(llvm::StoreInst &storeInst); - bool runImpl(llvm::Module &module); - static llvm::StringRef name() { return "Lower SPIR-V access chain"; } private: diff --git a/llpc/lower/llpcSpirvLowerCfgMerges.cpp b/llpc/lower/llpcSpirvLowerCfgMerges.cpp index 94a82b6ebc..1b861dd0ff 100644 --- a/llpc/lower/llpcSpirvLowerCfgMerges.cpp +++ b/llpc/lower/llpcSpirvLowerCfgMerges.cpp @@ -42,6 +42,7 @@ #include "llpcSpirvLower.h" #include "llpcSpirvLowerUtil.h" #include "lgc/Builder.h" +#include "lgc/LgcDialect.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" @@ -64,22 +65,12 @@ using namespace SPIRV; using namespace Llpc; // -enable-loop-reconvergence: force enable loop reconvergence transform -static cl::opt EnableLoopReconvergence("enable-loop-reconvergence", - cl::desc("Force enable loop reconvergence transform"), cl::init(false)); +static cl::opt ForceEnableLoopReconvergence("enable-loop-reconvergence", + cl::desc("Force enable loop reconvergence transform"), + cl::init(false)); namespace Llpc { -// ===================================================================================================================== -// Executes this SPIR-V lowering pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on (empty on entry) -// @param [in/out] analysisManager : Analysis manager to use for this transformation -PreservedAnalyses SpirvLowerCfgMerges::run(Module &module, ModuleAnalysisManager &analysisManager) { - bool changed = runImpl(module); - // In practice there are unlikely to be any analyses this early, but report accurate status anyway. - return changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); -} - /// Defines helper for print block and function identifiers during debugging class OpPrinter { BasicBlock *m_block; @@ -311,8 +302,9 @@ void SpirvLowerCfgMerges::mapConvergentValues(Module &module) { // ===================================================================================================================== // Executes this SPIR-V lowering pass on the specified LLVM module. // -// @param [in/out] module : LLVM module to be run on -bool SpirvLowerCfgMerges::runImpl(Module &module) { +// @param [in/out] module : LLVM module to be run on (empty on entry) +// @param [in/out] analysisManager : Analysis manager to use for this transformation +PreservedAnalyses SpirvLowerCfgMerges::run(Module &module, ModuleAnalysisManager &analysisManager) { LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-CfgMerges\n"); LLVM_DEBUG(dbgs() << "Processing module: " << module); @@ -321,7 +313,24 @@ bool SpirvLowerCfgMerges::runImpl(Module &module) { // Check for loops Function *loopMergeFunc = module.getFunction("spirv.loop.merge"); if (!loopMergeFunc) - return false; + return PreservedAnalyses::all(); + + bool requiresReconvergence = ForceEnableLoopReconvergence; + + // Enable transform if any function uses maximally reconverges SPIRV. + // Ideally we might restrict this to only applicable functions; however, + // as this runs pre-inliner it would require computing the whole call graph. + // Regardless the transform is only ever applied to loops with convergent operations. + for (Function &F : module) { + MDNode *metaNode = F.getMetadata(gSPIRVMD::MaximallyReconverges); + if (!metaNode) + continue; + auto flagValue = mdconst::dyn_extract(metaNode->getOperand(0)); + if (flagValue == ConstantInt::getTrue(*m_context)) { + requiresReconvergence = true; + break; + } + } // Map convergent values m_convergentValues.clear(); @@ -342,7 +351,7 @@ bool SpirvLowerCfgMerges::runImpl(Module &module) { // Note: this visit blocks deterministically and loop headers from outer loops before inner ones bool hasConvergentLoops = false; bool changed = false; - bool valid = EnableLoopReconvergence && !m_convergentValues.empty(); + bool valid = requiresReconvergence && !m_convergentValues.empty(); for (Function &F : module) { if (F.empty()) @@ -376,7 +385,7 @@ bool SpirvLowerCfgMerges::runImpl(Module &module) { if (!changed || !valid || !hasConvergentLoops) { m_convergentValues.clear(); - return changed; + return changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } // Output debug information before changing IR structure @@ -582,7 +591,7 @@ bool SpirvLowerCfgMerges::runImpl(Module &module) { // Determine if any lanes continue Value *notBreakPhi = BinaryOperator::CreateNot(breakPhi, "", loop->sigmaBlock); m_builder->SetInsertPoint(loop->sigmaBlock); - Value *anyContinue = m_builder->CreateSubgroupAny(notBreakPhi); + Value *anyContinue = m_builder->create(notBreakPhi); // Connect sigma block to wave header BranchInst *loopEnd = BranchInst::Create(waveHeader, postSigmaBlock, anyContinue, loop->sigmaBlock); @@ -644,7 +653,7 @@ bool SpirvLowerCfgMerges::runImpl(Module &module) { } m_convergentValues.clear(); - return true; + return PreservedAnalyses::none(); } } // namespace Llpc diff --git a/llpc/lower/llpcSpirvLowerCfgMerges.h b/llpc/lower/llpcSpirvLowerCfgMerges.h index 39848809d2..b4549166db 100644 --- a/llpc/lower/llpcSpirvLowerCfgMerges.h +++ b/llpc/lower/llpcSpirvLowerCfgMerges.h @@ -40,7 +40,6 @@ namespace Llpc { class SpirvLowerCfgMerges : public SpirvLower, public llvm::PassInfoMixin { public: llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - bool runImpl(llvm::Module &module); void mapConvergentValues(llvm::Module &module); diff --git a/llpc/lower/llpcSpirvLowerGlobal.cpp b/llpc/lower/llpcSpirvLowerGlobal.cpp index 10bc5d68d3..39ce3d3f8d 100644 --- a/llpc/lower/llpcSpirvLowerGlobal.cpp +++ b/llpc/lower/llpcSpirvLowerGlobal.cpp @@ -203,15 +203,6 @@ SpirvLowerGlobal::SpirvLowerGlobal() // @param [in/out] module : LLVM module to be run on (empty on entry) // @param [in/out] analysisManager : Analysis manager to use for this transformation PreservedAnalyses SpirvLowerGlobal::run(Module &module, ModuleAnalysisManager &analysisManager) { - runImpl(module); - return PreservedAnalyses::none(); -} - -// ===================================================================================================================== -// Executes this SPIR-V lowering pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -bool SpirvLowerGlobal::runImpl(Module &module) { LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-Global\n"); SpirvLower::init(&module); @@ -271,7 +262,7 @@ bool SpirvLowerGlobal::runImpl(Module &module) { lowerAliasedVal(); lowerShaderRecordBuffer(); - return true; + return PreservedAnalyses::none(); } // ===================================================================================================================== @@ -598,9 +589,7 @@ void SpirvLowerGlobal::mapGlobalVariableToProxy(GlobalVariable *globalVar) { const auto &dataLayout = m_module->getDataLayout(); Type *globalVarTy = globalVar->getValueType(); - assert(m_entryPoint); Value *proxy = nullptr; - assert(m_entryPoint); removeConstantExpr(m_context, globalVar); // Handle special globals, regular allocas will be removed by SROA pass. if (globalVar->getName().starts_with(RtName::HitAttribute)) { diff --git a/llpc/lower/llpcSpirvLowerGlobal.h b/llpc/lower/llpcSpirvLowerGlobal.h index 1d0af2dde3..6cf2efe167 100644 --- a/llpc/lower/llpcSpirvLowerGlobal.h +++ b/llpc/lower/llpcSpirvLowerGlobal.h @@ -49,7 +49,6 @@ class SpirvLowerGlobal : public SpirvLower, public llvm::PassInfoMixin TargetLibraryInfo & { - FunctionAnalysisManager &functionAnalysisManager = - analysisManager.getResult(module).getManager(); - return functionAnalysisManager.getResult(*m_entryPoint); - }); - return PreservedAnalyses::none(); -} - -// ===================================================================================================================== -// Executes constant folding SPIR-V lowering pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -bool SpirvLowerMathConstFolding::runImpl(Module &module, - const std::function &getTargetLibraryInfo) { LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-Math-Const-Folding\n"); SpirvLowerMath::init(module); if (m_shaderStage == ShaderStageInvalid) - return false; + return PreservedAnalyses::all(); if (m_fp16DenormFlush || m_fp32DenormFlush || m_fp64DenormFlush) { // Do constant folding if we need flush denorm to zero. - auto &targetLibInfo = getTargetLibraryInfo(); + FunctionAnalysisManager &functionAnalysisManager = + analysisManager.getResult(module).getManager(); + auto &targetLibInfo = functionAnalysisManager.getResult(*m_entryPoint); auto &dataLayout = m_module->getDataLayout(); for (auto &block : *m_entryPoint) { @@ -253,7 +241,7 @@ bool SpirvLowerMathConstFolding::runImpl(Module &module, } } - return m_changed; + return m_changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } // ===================================================================================================================== @@ -265,17 +253,6 @@ Function *SpirvLowerMathConstFolding::getEntryPoint() { #undef DEBUG_TYPE // DEBUG_TYPE_CONST_FOLDING #define DEBUG_TYPE DEBUG_TYPE_PRECISION -// ===================================================================================================================== -// Run precision (fast math flag) adjustment SPIR-V lowering pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on (empty on entry) -// @param [in/out] analysisManager : Analysis manager to use for this transformation -PreservedAnalyses SpirvLowerMathPrecision::run(Module &module, ModuleAnalysisManager &analysisManager) { - if (runImpl(module)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} - bool SpirvLowerMathPrecision::adjustExports(Module &module) { bool changed = false; for (auto &func : module.functions()) { @@ -393,13 +370,14 @@ bool SpirvLowerMathPrecision::propagateNoContract(Module &module, bool forward, // ===================================================================================================================== // Run precision (fast math flag) adjustment SPIR-V lowering pass on the specified LLVM module. // -// @param [in/out] module : LLVM module to be run on -bool SpirvLowerMathPrecision::runImpl(Module &module) { +// @param [in/out] module : LLVM module to be run on (empty on entry) +// @param [in/out] analysisManager : Analysis manager to use for this transformation +PreservedAnalyses SpirvLowerMathPrecision::run(Module &module, ModuleAnalysisManager &analysisManager) { LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-Math-Precision\n"); SpirvLower::init(&module); if (m_shaderStage == ShaderStageInvalid) - return false; + return PreservedAnalyses::all(); bool forwardPropagate = false; bool backwardPropagate = false; @@ -446,7 +424,7 @@ bool SpirvLowerMathPrecision::runImpl(Module &module) { if (forwardPropagate || backwardPropagate) propagatedNoContract = propagateNoContract(module, forwardPropagate, backwardPropagate); - return adjustedExports || propagatedNoContract; + return (adjustedExports || propagatedNoContract) ? PreservedAnalyses::none() : PreservedAnalyses::all(); } #undef DEBUG_TYPE // DEBUG_TYPE_PRECISION @@ -458,21 +436,12 @@ bool SpirvLowerMathPrecision::runImpl(Module &module) { // @param [in/out] module : LLVM module to be run on (empty on entry) // @param [in/out] analysisManager : Analysis manager to use for this transformation PreservedAnalyses SpirvLowerMathFloatOp::run(Module &module, ModuleAnalysisManager &analysisManager) { - runImpl(module); - return PreservedAnalyses::none(); -} - -// ===================================================================================================================== -// Executes floating point optimisation SPIR-V lowering pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -bool SpirvLowerMathFloatOp::runImpl(Module &module) { LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-Math-Float-Op\n"); SpirvLowerMath::init(module); visit(m_module); - return m_changed; + return m_changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } // ===================================================================================================================== diff --git a/llpc/lower/llpcSpirvLowerMath.h b/llpc/lower/llpcSpirvLowerMath.h index 7720d0e9bf..200be86dc0 100644 --- a/llpc/lower/llpcSpirvLowerMath.h +++ b/llpc/lower/llpcSpirvLowerMath.h @@ -61,11 +61,6 @@ class SpirvLowerMathConstFolding : public SpirvLowerMath, public llvm::PassInfoM public: llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - // NOTE: We use a function parameter here to get the TargetLibraryInfo object. This is - // needed because the passes for the legacy and new pass managers use different ways to - // retrieve it. That also ensures the object is retrieved once the passes are properly - // initialized. This can be removed once the switch to the new pass manager is completed. - bool runImpl(llvm::Module &module, const std::function &getTargetLibraryInfo); static llvm::StringRef name() { return "Lower SPIR-V math constant folding"; } @@ -80,7 +75,6 @@ class SpirvLowerMathPrecision : public SpirvLower, public llvm::PassInfoMixin { public: llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - bool runImpl(llvm::Module &module); virtual void visitBinaryOperator(llvm::BinaryOperator &binaryOp); virtual void visitCallInst(llvm::CallInst &callInst); diff --git a/llpc/lower/llpcSpirvLowerMemoryOp.cpp b/llpc/lower/llpcSpirvLowerMemoryOp.cpp index 86d67bce4a..8c1b205ded 100644 --- a/llpc/lower/llpcSpirvLowerMemoryOp.cpp +++ b/llpc/lower/llpcSpirvLowerMemoryOp.cpp @@ -50,15 +50,6 @@ namespace Llpc { // @param [in/out] module : LLVM module to be run on // @param [in/out] analysisManager : Analysis manager to use for this transformation PreservedAnalyses SpirvLowerMemoryOp::run(Module &module, ModuleAnalysisManager &analysisManager) { - runImpl(module); - return PreservedAnalyses::none(); -} - -// ===================================================================================================================== -// Executes this SPIR-V lowering pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -bool SpirvLowerMemoryOp::runImpl(Module &module) { LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-Memory-Op\n"); SpirvLower::init(&module); @@ -88,7 +79,7 @@ bool SpirvLowerMemoryOp::runImpl(Module &module) { LLVM_DEBUG(dbgs() << "After the pass Spirv-Lower-Memory-Op " << module); - return true; + return PreservedAnalyses::none(); } // ===================================================================================================================== diff --git a/llpc/lower/llpcSpirvLowerMemoryOp.h b/llpc/lower/llpcSpirvLowerMemoryOp.h index 18ec4227fb..7e0dd5721c 100644 --- a/llpc/lower/llpcSpirvLowerMemoryOp.h +++ b/llpc/lower/llpcSpirvLowerMemoryOp.h @@ -57,7 +57,6 @@ class SpirvLowerMemoryOp : public SpirvLower, public llvm::PassInfoMixin { public: llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - bool runImpl(llvm::Module &module); static llvm::StringRef name() { return "Lower SPIR-V memory operations"; } diff --git a/llpc/lower/llpcSpirvLowerRayQuery.cpp b/llpc/lower/llpcSpirvLowerRayQuery.cpp index 751b77d956..8cc360eaeb 100644 --- a/llpc/lower/llpcSpirvLowerRayQuery.cpp +++ b/llpc/lower/llpcSpirvLowerRayQuery.cpp @@ -291,15 +291,6 @@ SpirvLowerRayQuery::SpirvLowerRayQuery(bool rayQueryLibrary) // @param [in/out] module : LLVM module to be run on // @param [in/out] analysisManager : Analysis manager to use for this transformation PreservedAnalyses SpirvLowerRayQuery::run(Module &module, ModuleAnalysisManager &analysisManager) { - runImpl(module); - return PreservedAnalyses::none(); -} - -// ===================================================================================================================== -// Executes this SPIR-V lowering pass on the specified LLVM module. -// -// @param [in,out] module : LLVM module to be run on -bool SpirvLowerRayQuery::runImpl(Module &module) { LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-ray-query\n"); SpirvLower::init(&module); createGlobalRayQueryObj(); @@ -314,7 +305,7 @@ bool SpirvLowerRayQuery::runImpl(Module &module) { processShaderFunction(func, getFuncOpcode(func)); } } - return true; + return PreservedAnalyses::none(); } // ===================================================================================================================== diff --git a/llpc/lower/llpcSpirvLowerRayQuery.h b/llpc/lower/llpcSpirvLowerRayQuery.h index e6274d3f2c..3becb9f588 100644 --- a/llpc/lower/llpcSpirvLowerRayQuery.h +++ b/llpc/lower/llpcSpirvLowerRayQuery.h @@ -117,7 +117,6 @@ class SpirvLowerRayQuery : public SpirvLower, public llvm::PassInfoMixin { public: llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - virtual bool runImpl(llvm::Module &module); static llvm::StringRef name() { return "Lower SPIR-V RayQueryPostInline operations"; } }; diff --git a/llpc/lower/llpcSpirvLowerRayTracing.cpp b/llpc/lower/llpcSpirvLowerRayTracing.cpp index 5e788af226..17f5b57db2 100644 --- a/llpc/lower/llpcSpirvLowerRayTracing.cpp +++ b/llpc/lower/llpcSpirvLowerRayTracing.cpp @@ -32,6 +32,7 @@ #include "llpcSpirvLowerRayTracing.h" #include "SPIRVInternal.h" #include "continuations/ContinuationsUtil.h" +#include "continuations/GpurtContext.h" #include "gpurt-compiler.h" #include "llpcContext.h" #include "llpcRayTracingContext.h" diff --git a/llpc/lower/llpcSpirvLowerTerminator.cpp b/llpc/lower/llpcSpirvLowerTerminator.cpp index 7a98c2683e..63ba34bf65 100644 --- a/llpc/lower/llpcSpirvLowerTerminator.cpp +++ b/llpc/lower/llpcSpirvLowerTerminator.cpp @@ -56,22 +56,13 @@ namespace Llpc { // @param [in/out] module : LLVM module to be run on (empty on entry) // @param [in/out] analysisManager : Analysis manager to use for this transformation PreservedAnalyses SpirvLowerTerminator::run(Module &module, ModuleAnalysisManager &analysisManager) { - runImpl(module); - return PreservedAnalyses::none(); -} - -// ===================================================================================================================== -// Executes this SPIR-V lowering pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -bool SpirvLowerTerminator::runImpl(Module &module) { LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-Terminator\n"); SpirvLower::init(&module); // Kills are only valid in fragment shader model. if (m_shaderStage != ShaderStageFragment) - return false; + return PreservedAnalyses::all(); // Invoke handling of "kill" instructions. visit(m_module); @@ -86,7 +77,7 @@ bool SpirvLowerTerminator::runImpl(Module &module) { } m_instsForRemoval.clear(); - return changed; + return changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } // ===================================================================================================================== diff --git a/llpc/lower/llpcSpirvLowerTerminator.h b/llpc/lower/llpcSpirvLowerTerminator.h index 0711b6d663..f6a107e732 100644 --- a/llpc/lower/llpcSpirvLowerTerminator.h +++ b/llpc/lower/llpcSpirvLowerTerminator.h @@ -43,7 +43,6 @@ class SpirvLowerTerminator : public SpirvLower, public llvm::InstVisitor { public: llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - bool runImpl(llvm::Module &module); static llvm::StringRef name() { return "Lower SPIR-V terminator"; } diff --git a/llpc/lower/llpcSpirvLowerTranslator.cpp b/llpc/lower/llpcSpirvLowerTranslator.cpp index 380617a67b..ba507f9c9f 100644 --- a/llpc/lower/llpcSpirvLowerTranslator.cpp +++ b/llpc/lower/llpcSpirvLowerTranslator.cpp @@ -47,15 +47,6 @@ using namespace Llpc; // @param [in/out] module : LLVM module to be run on (empty on entry) // @param [in/out] analysisManager : Analysis manager to use for this transformation llvm::PreservedAnalyses SpirvLowerTranslator::run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager) { - runImpl(module); - return llvm::PreservedAnalyses::none(); -} - -// ===================================================================================================================== -// Run the pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on (empty on entry) -bool SpirvLowerTranslator::runImpl(Module &module) { LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-Translator\n"); SpirvLower::init(&module); @@ -68,7 +59,7 @@ bool SpirvLowerTranslator::runImpl(Module &module) { // Translate SPIR-V binary to machine-independent LLVM module translateSpirvToLlvm(m_shaderInfo, &module); - return true; + return PreservedAnalyses::none(); } // ===================================================================================================================== @@ -116,8 +107,8 @@ void SpirvLowerTranslator::translateSpirvToLlvm(const PipelineShaderInfo *shader } if (!readSpirv(context->getBuilder(), &(moduleData->usage), &(shaderInfo->options), spirvStream, - convertToExecModel(entryStage), shaderInfo->pEntryTarget, specConstMap, convertingSamplers, module, - errMsg)) { + convertToExecModel(entryStage), shaderInfo->pEntryTarget, specConstMap, convertingSamplers, + m_globalVarPrefix, module, errMsg)) { report_fatal_error(Twine("Failed to translate SPIR-V to LLVM (") + getShaderStageName(static_cast(entryStage)) + " shader): " + errMsg, false); diff --git a/llpc/lower/llpcSpirvLowerTranslator.h b/llpc/lower/llpcSpirvLowerTranslator.h index 161754a959..78389ac002 100644 --- a/llpc/lower/llpcSpirvLowerTranslator.h +++ b/llpc/lower/llpcSpirvLowerTranslator.h @@ -44,10 +44,10 @@ class SpirvLowerTranslator : public SpirvLower, public llvm::PassInfoMixinSetInsertPoint(clearBlock(func)); createContStackLoad(func); return; + } else if (funcName.starts_with("_AmdEnqueue") || funcName.starts_with("_AmdWaitEnqueue")) { + m_builder->SetInsertPoint(clearBlock(func)); + createEnqueue(func); + return; } // Create implementation for intrinsic functions. @@ -725,6 +730,13 @@ void SpirvProcessGpuRtLibrary::createGetKnownUnsetRayFlags(llvm::Function *func) m_builder->CreateRet(m_builder->getInt32(0)); } +// ===================================================================================================================== +// Fill in function of AmdExtDispatchThreadIdFlat +// +// @param func : The function to create +void SpirvProcessGpuRtLibrary::createDispatchThreadIdFlat(llvm::Function *func) { + m_builder->CreateRet(m_builder->create()); +} // ===================================================================================================================== // Fill in function to allocate continuation stack pointer // @@ -798,4 +810,40 @@ void SpirvProcessGpuRtLibrary::createContStackStore(llvm::Function *func) { m_builder->CreateRetVoid(); } +// ===================================================================================================================== +// Fill in function to enqueue shader +// +// @param func : The function to create +void SpirvProcessGpuRtLibrary::createEnqueue(Function *func) { + auto funcName = func->getName(); + + Value *addr = m_builder->CreateLoad(m_builder->getInt32Ty(), func->getArg(0)); + + SmallVector tailArgs; + // _AmdEnqueueTraversal and _AmdWaitEnqueueRayGen do not have return-address. + bool hasRetAddrArg = !funcName.contains("RayGen") && !funcName.contains("Traversal"); + bool hasWaitMaskArg = funcName.contains("Wait"); + if (hasRetAddrArg) { + // Skip csp and waitMask + unsigned retAddrArgIdx = hasWaitMaskArg ? 3 : 2; + tailArgs.push_back(m_builder->CreateLoad(m_builder->getInt32Ty(), func->getArg(retAddrArgIdx))); + } else { + tailArgs.push_back(PoisonValue::get(m_builder->getInt32Ty())); + } + // Get shader-index from system-data. + unsigned systemDataArgIdx = 2 + (hasRetAddrArg ? 1 : 0) + (hasWaitMaskArg ? 1 : 0); + tailArgs.push_back(m_builder->CreateNamedCall("_cont_GetLocalRootIndex", m_builder->getInt32Ty(), + {func->getArg(systemDataArgIdx)}, {})); + // Process system-data and arguments after. + unsigned argIdx = systemDataArgIdx; + while (argIdx < func->arg_size()) { + tailArgs.push_back(m_builder->CreateLoad(getFuncArgPtrElementType(func, argIdx), func->getArg(argIdx))); + argIdx++; + } + + // TODO: pass the levelMask correctly. + m_builder->create(addr, -1, PoisonValue::get(StructType::get(*m_context, {})), tailArgs); + m_builder->CreateUnreachable(); +} + } // namespace Llpc diff --git a/llpc/lower/llpcSpirvProcessGpuRtLibrary.h b/llpc/lower/llpcSpirvProcessGpuRtLibrary.h index 8acfcff410..0760df729a 100644 --- a/llpc/lower/llpcSpirvProcessGpuRtLibrary.h +++ b/llpc/lower/llpcSpirvProcessGpuRtLibrary.h @@ -86,6 +86,7 @@ class SpirvProcessGpuRtLibrary : public SpirvLower, public llvm::PassInfoMixin], align 16, addrspace(5) diff --git a/llpc/test/shaderdb/core/OOB_Check_Load_Array_lit.frag b/llpc/test/shaderdb/core/OOB_Check_Load_Array_lit.frag index c649dd3d24..3ad183017a 100644 --- a/llpc/test/shaderdb/core/OOB_Check_Load_Array_lit.frag +++ b/llpc/test/shaderdb/core/OOB_Check_Load_Array_lit.frag @@ -4,7 +4,7 @@ // BEGIN_SHADERTEST /* -; RUN: amdllpc -v %gfxip %s -enable-scratch-bounds-checks | FileCheck -check-prefix=SHADERTEST %s +; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST: .[[entry:[a-z0-9]+]]: ; SHADERTEST: %[[arr:[a-z0-9]+]] = alloca [2048 x [2 x <4 x float>]], align 16, addrspace(5) diff --git a/llpc/test/shaderdb/core/OOB_Check_Load_Array_with_Struct_lit.frag b/llpc/test/shaderdb/core/OOB_Check_Load_Array_with_Struct_lit.frag index 78fea7c02a..b851d4e295 100644 --- a/llpc/test/shaderdb/core/OOB_Check_Load_Array_with_Struct_lit.frag +++ b/llpc/test/shaderdb/core/OOB_Check_Load_Array_with_Struct_lit.frag @@ -4,7 +4,7 @@ // BEGIN_SHADERTEST /* -; RUN: amdllpc -v %gfxip %s -enable-scratch-bounds-checks | FileCheck -check-prefix=SHADERTEST %s +; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST: .[[entry:[a-z0-9]+]]: ; SHADERTEST: %[[arr:[a-z0-9]+]] = alloca [5 x { <4 x float> }], align 16, addrspace(5) diff --git a/llpc/test/shaderdb/core/OOB_Check_Load_Matrix_Vector_lit.frag b/llpc/test/shaderdb/core/OOB_Check_Load_Matrix_Vector_lit.frag index 30c904a173..5c23ba0f04 100644 --- a/llpc/test/shaderdb/core/OOB_Check_Load_Matrix_Vector_lit.frag +++ b/llpc/test/shaderdb/core/OOB_Check_Load_Matrix_Vector_lit.frag @@ -4,7 +4,7 @@ // BEGIN_SHADERTEST /* -; RUN: amdllpc -v %gfxip %s -enable-scratch-bounds-checks | FileCheck -check-prefix=SHADERTEST %s +; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST: .[[entry:[a-z0-9]+]]: ; SHADERTEST: %[[mat:[a-z0-9]+]] = alloca [4 x <4 x float>], align 16, addrspace(5) diff --git a/llpc/test/shaderdb/core/OOB_Check_Load_Matrix_lit.frag b/llpc/test/shaderdb/core/OOB_Check_Load_Matrix_lit.frag index 3f7969390d..ef1e6eefe0 100644 --- a/llpc/test/shaderdb/core/OOB_Check_Load_Matrix_lit.frag +++ b/llpc/test/shaderdb/core/OOB_Check_Load_Matrix_lit.frag @@ -4,7 +4,7 @@ // BEGIN_SHADERTEST /* -; RUN: amdllpc -v %gfxip %s -enable-scratch-bounds-checks | FileCheck -check-prefix=SHADERTEST %s +; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST: .[[entry:[a-z0-9]+]]: ; SHADERTEST: %[[arr:[a-z0-9]+]] = alloca [4 x <4 x float>], align 16, addrspace(5) diff --git a/llpc/test/shaderdb/core/OOB_Check_Load_Nested_Struct_lit.frag b/llpc/test/shaderdb/core/OOB_Check_Load_Nested_Struct_lit.frag index df1d1959bd..d7d66f58be 100644 --- a/llpc/test/shaderdb/core/OOB_Check_Load_Nested_Struct_lit.frag +++ b/llpc/test/shaderdb/core/OOB_Check_Load_Nested_Struct_lit.frag @@ -4,7 +4,7 @@ // BEGIN_SHADERTEST /* -; RUN: amdllpc -v %gfxip %s -enable-scratch-bounds-checks | FileCheck -check-prefix=SHADERTEST %s +; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST: .[[entry:[a-z0-9]+]]: ; SHADERTEST: %[[arr:[a-z0-9]+]] = alloca [5 x { [10 x <4 x float>], [12 x { [4 x <4 x float>] }] }], align 16, addrspace(5) diff --git a/llpc/test/shaderdb/core/OOB_Check_Load_Struct_lit.frag b/llpc/test/shaderdb/core/OOB_Check_Load_Struct_lit.frag index 6b9508d52f..2c4fdbb9f9 100644 --- a/llpc/test/shaderdb/core/OOB_Check_Load_Struct_lit.frag +++ b/llpc/test/shaderdb/core/OOB_Check_Load_Struct_lit.frag @@ -4,7 +4,7 @@ // BEGIN_SHADERTEST /* -; RUN: amdllpc -v %gfxip %s -enable-scratch-bounds-checks | FileCheck -check-prefix=SHADERTEST %s +; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST: .[[entry:[a-z0-9]+]]: ; SHADERTEST: %[[arr:[a-z0-9]+]] = alloca [5 x { [10 x <4 x float>], [12 x <4 x float>] }], align 16, addrspace(5) diff --git a/llpc/test/shaderdb/core/OOB_Check_Load_Vector_lit.frag b/llpc/test/shaderdb/core/OOB_Check_Load_Vector_lit.frag index 7800e42974..18e0f93283 100644 --- a/llpc/test/shaderdb/core/OOB_Check_Load_Vector_lit.frag +++ b/llpc/test/shaderdb/core/OOB_Check_Load_Vector_lit.frag @@ -4,7 +4,7 @@ // BEGIN_SHADERTEST /* -; RUN: amdllpc -v %gfxip %s -enable-scratch-bounds-checks | FileCheck -check-prefix=SHADERTEST %s +; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST: .[[entry:[a-z0-9]+]]: ; SHADERTEST: %[[arr:[a-z0-9]+]] = alloca <4 x float>, align 16, addrspace(5) diff --git a/llpc/test/shaderdb/core/OOB_Check_Multiple_Load_lit.frag b/llpc/test/shaderdb/core/OOB_Check_Multiple_Load_lit.frag index 5b9135fc1f..5d97db6c6e 100644 --- a/llpc/test/shaderdb/core/OOB_Check_Multiple_Load_lit.frag +++ b/llpc/test/shaderdb/core/OOB_Check_Multiple_Load_lit.frag @@ -4,7 +4,7 @@ // BEGIN_SHADERTEST /* -; RUN: amdllpc -v %gfxip %s -enable-scratch-bounds-checks | FileCheck -check-prefix=SHADERTEST %s +; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST: .[[entry:[a-z0-9]+]]: ; SHADERTEST: %[[arr:[a-z0-9]+]] = alloca <4 x float>, align 16, addrspace(5) diff --git a/llpc/test/shaderdb/core/OOB_Check_Optimization_lit.frag b/llpc/test/shaderdb/core/OOB_Check_Optimization_lit.frag index 9416f4f53c..b9a596f97e 100644 --- a/llpc/test/shaderdb/core/OOB_Check_Optimization_lit.frag +++ b/llpc/test/shaderdb/core/OOB_Check_Optimization_lit.frag @@ -3,7 +3,7 @@ // BEGIN_SHADERTEST /* -; RUN: amdllpc -v %gfxip %s -enable-scratch-bounds-checks | FileCheck -check-prefix=SHADERTEST %s +; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} final pipeline module info ; SHADERTEST: define dllexport amdgpu_ps {{.*}} @_amdgpu_ps_main ; SHADERTEST-NOT: phi diff --git a/llpc/test/shaderdb/core/OOB_Check_Store_Array_lit.frag b/llpc/test/shaderdb/core/OOB_Check_Store_Array_lit.frag index 736c9c1209..3f0326b970 100644 --- a/llpc/test/shaderdb/core/OOB_Check_Store_Array_lit.frag +++ b/llpc/test/shaderdb/core/OOB_Check_Store_Array_lit.frag @@ -4,7 +4,7 @@ // BEGIN_SHADERTEST /* -; RUN: amdllpc -v %gfxip %s -enable-scratch-bounds-checks | FileCheck -check-prefix=SHADERTEST %s +; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST: .[[entry:[a-z0-9]+]]: ; SHADERTEST: %[[arr:[a-z0-9]+]] = alloca [2048 x [2 x <4 x float>]], align 16, addrspace(5) diff --git a/llpc/test/shaderdb/core/OOB_Check_Store_Struct_lit.frag b/llpc/test/shaderdb/core/OOB_Check_Store_Struct_lit.frag index 6eed2029ca..64968b9dd5 100644 --- a/llpc/test/shaderdb/core/OOB_Check_Store_Struct_lit.frag +++ b/llpc/test/shaderdb/core/OOB_Check_Store_Struct_lit.frag @@ -4,7 +4,7 @@ // BEGIN_SHADERTEST /* -; RUN: amdllpc -v %gfxip %s -enable-scratch-bounds-checks | FileCheck -check-prefix=SHADERTEST %s +; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST: .[[entry:[a-z0-9]+]]: ; SHADERTEST: %[[arr:[a-z0-9]+]] = alloca [5 x { [10 x <4 x float>] }], align 16, addrspace(5) diff --git a/llpc/test/shaderdb/core/OpFOrdEqual_TestVec3_lit.frag b/llpc/test/shaderdb/core/OpFOrdEqual_TestVec3_lit.frag index 7f127ad484..110b133f0d 100644 --- a/llpc/test/shaderdb/core/OpFOrdEqual_TestVec3_lit.frag +++ b/llpc/test/shaderdb/core/OpFOrdEqual_TestVec3_lit.frag @@ -20,7 +20,7 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP0]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <{ [3 x float], [4 x i8], [3 x float] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ [[]3 x float], [[]4 x i8], [[]3 x float] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 2}} // CHECK-NEXT: [[TMP4:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP3]], align 16 // CHECK-NEXT: [[TMP5:%.*]] = extractelement <3 x float> [[TMP2]], i64 0 // CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x float> [[TMP4]], i64 0 diff --git a/llpc/test/shaderdb/core/OpFOrdNotEqual_TestVec3_lit.frag b/llpc/test/shaderdb/core/OpFOrdNotEqual_TestVec3_lit.frag index 651505b77c..05b9e49a03 100644 --- a/llpc/test/shaderdb/core/OpFOrdNotEqual_TestVec3_lit.frag +++ b/llpc/test/shaderdb/core/OpFOrdNotEqual_TestVec3_lit.frag @@ -20,7 +20,7 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP0]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <{ [3 x float], [4 x i8], [3 x float] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ [[]3 x float], [[]4 x i8], [[]3 x float] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 2}} // CHECK-NEXT: [[TMP4:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP3]], align 16 // CHECK-NEXT: [[TMP5:%.*]] = extractelement <3 x float> [[TMP2]], i64 0 // CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x float> [[TMP4]], i64 0 diff --git a/llpc/test/shaderdb/core/OpIEqual_TestIvec2_lit.frag b/llpc/test/shaderdb/core/OpIEqual_TestIvec2_lit.frag index b209a31aa1..01adb95c6a 100644 --- a/llpc/test/shaderdb/core/OpIEqual_TestIvec2_lit.frag +++ b/llpc/test/shaderdb/core/OpIEqual_TestIvec2_lit.frag @@ -20,7 +20,7 @@ void main() // SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // SHADERTEST-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP0]], align 8 -// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr inbounds <{ [2 x i32], [2 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 1 +// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ [[]2 x i32], [[]2 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} // SHADERTEST-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP3]], align 8 // SHADERTEST-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP2]], i64 0 // SHADERTEST-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i64 0 diff --git a/llpc/test/shaderdb/core/OpINotEqual_TestIvec2_lit.frag b/llpc/test/shaderdb/core/OpINotEqual_TestIvec2_lit.frag index d423ef6ff8..d58cc30b47 100644 --- a/llpc/test/shaderdb/core/OpINotEqual_TestIvec2_lit.frag +++ b/llpc/test/shaderdb/core/OpINotEqual_TestIvec2_lit.frag @@ -20,7 +20,7 @@ void main() // SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // SHADERTEST-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP0]], align 8 -// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr inbounds <{ [2 x i32], [2 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 1 +// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ [[]2 x i32], [[]2 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} // SHADERTEST-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP3]], align 8 // SHADERTEST-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP2]], i64 0 // SHADERTEST-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i64 0 diff --git a/llpc/test/shaderdb/core/OpLogicalNotEqual_TestGeneral_lit.frag b/llpc/test/shaderdb/core/OpLogicalNotEqual_TestGeneral_lit.frag index 70bf94b37a..af55f04005 100644 --- a/llpc/test/shaderdb/core/OpLogicalNotEqual_TestGeneral_lit.frag +++ b/llpc/test/shaderdb/core/OpLogicalNotEqual_TestGeneral_lit.frag @@ -29,10 +29,10 @@ void main() // SHADERTEST-NEXT: .entry: // SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 2) // SHADERTEST-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(7) [[TMP0]], align 4 -// SHADERTEST-NEXT: [[TMP2:%.*]] = getelementptr inbounds <{ i32, [4 x i8], [2 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 2, i32 0 +// SHADERTEST-NEXT: [[TMP2:%.*]] = getelementptr inbounds {{i8|<{ i32, [[]4 x i8], [[]2 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 2, i32 0}} // SHADERTEST-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(7) [[TMP2]], align 4 // SHADERTEST-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP1]], [[TMP3]] -// SHADERTEST-NEXT: [[TMP5:%.*]] = getelementptr inbounds <{ i32, [4 x i8], [2 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 2, i32 1 +// SHADERTEST-NEXT: [[TMP5:%.*]] = getelementptr inbounds {{i8|<{ i32, [[]4 x i8], [[]2 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{12|0, i32 2, i32 1}} // SHADERTEST-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(7) [[TMP5]], align 4 // SHADERTEST-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP1]], [[TMP6]] // SHADERTEST-NEXT: [[TMP8:%.*]] = and i1 [[TMP4]], [[TMP7]] diff --git a/llpc/test/shaderdb/core/OpPtrEqualTest.spvasm b/llpc/test/shaderdb/core/OpPtrEqualTest.spvasm index 820c25992e..2d521e743f 100644 --- a/llpc/test/shaderdb/core/OpPtrEqualTest.spvasm +++ b/llpc/test/shaderdb/core/OpPtrEqualTest.spvasm @@ -89,11 +89,11 @@ ; SHADERTEST-NEXT: .entry: ; SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 2, i32 0, i32 2) ; SHADERTEST-NEXT: store i32 1, ptr addrspace(7) [[TMP0]], align 4 -; SHADERTEST-NEXT: [[TMP1:%.*]] = getelementptr <{ [4294967295 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 0, i32 1 +; SHADERTEST-NEXT: [[TMP1:%.*]] = getelementptr {{i8|<{ [[]4294967295 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{4|0, i32 0, i32 1}} ; SHADERTEST-NEXT: store i32 1, ptr addrspace(7) [[TMP1]], align 4 -; SHADERTEST-NEXT: [[TMP2:%.*]] = getelementptr <{ [4294967295 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 0, i32 2 +; SHADERTEST-NEXT: [[TMP2:%.*]] = getelementptr {{i8|<{ [[]4294967295 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 0, i32 2}} ; SHADERTEST-NEXT: store i32 1, ptr addrspace(7) [[TMP2]], align 4 -; SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr <{ [4294967295 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 0, i32 3 +; SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr {{i8|<{ [[]4294967295 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{12|0, i32 0, i32 3}} ; SHADERTEST-NEXT: store i32 0, ptr addrspace(7) [[TMP3]], align 4 ; SHADERTEST-NEXT: ret void ; diff --git a/llpc/test/shaderdb/core/OpSLessThanEqual_TestSignedAndUnsigned_lit.frag b/llpc/test/shaderdb/core/OpSLessThanEqual_TestSignedAndUnsigned_lit.frag index 21cdb8f20d..7956d7e89c 100644 --- a/llpc/test/shaderdb/core/OpSLessThanEqual_TestSignedAndUnsigned_lit.frag +++ b/llpc/test/shaderdb/core/OpSLessThanEqual_TestSignedAndUnsigned_lit.frag @@ -25,7 +25,7 @@ void main() // SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // SHADERTEST-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP0]], align 8 -// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr inbounds <{ [2 x i32], [2 x i32], [2 x i32], [2 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 1 +// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ [[]2 x i32], [[]2 x i32], [[]2 x i32], [[]2 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} // SHADERTEST-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP3]], align 8 // SHADERTEST-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP2]], i64 0 // SHADERTEST-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i64 0 diff --git a/llpc/test/shaderdb/core/OpSLessThan_TestSignedAndUnsigned_lit.frag b/llpc/test/shaderdb/core/OpSLessThan_TestSignedAndUnsigned_lit.frag index f9f18d7d0f..2bb60ce1df 100644 --- a/llpc/test/shaderdb/core/OpSLessThan_TestSignedAndUnsigned_lit.frag +++ b/llpc/test/shaderdb/core/OpSLessThan_TestSignedAndUnsigned_lit.frag @@ -25,7 +25,7 @@ void main() // SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // SHADERTEST-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP0]], align 8 -// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr inbounds <{ [2 x i32], [2 x i32], [2 x i32], [2 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 1 +// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ [[]2 x i32], [[]2 x i32], [[]2 x i32], [[]2 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} // SHADERTEST-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP3]], align 8 // SHADERTEST-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP2]], i64 0 // SHADERTEST-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i64 0 diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestRelationalOp_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestRelationalOp_lit.frag index 0136adcbc0..ededdfb7a3 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestRelationalOp_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestRelationalOp_lit.frag @@ -36,9 +36,9 @@ void main() // SHADERTEST-NEXT: .entry: // SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) -// SHADERTEST-NEXT: [[TMP2:%.*]] = getelementptr inbounds <{ i64, i64, [16 x i8], [3 x i64], [8 x i8], [3 x i64] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 3 +// SHADERTEST-NEXT: [[TMP2:%.*]] = getelementptr inbounds {{i8|<{ i64, i64, [[]16 x i8], [[]3 x i64], [[]8 x i8], [[]3 x i64] }>}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 3}} // SHADERTEST-NEXT: [[TMP3:%.*]] = load <3 x i64>, ptr addrspace(7) [[TMP2]], align 32 -// SHADERTEST-NEXT: [[TMP4:%.*]] = getelementptr inbounds <{ i64, i64, [16 x i8], [3 x i64], [8 x i8], [3 x i64] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 5 +// SHADERTEST-NEXT: [[TMP4:%.*]] = getelementptr inbounds {{i8|<{ i64, i64, [[]16 x i8], [[]3 x i64], [[]8 x i8], [[]3 x i64] }>}}, ptr addrspace(7) [[TMP0]], i32 {{64|0, i32 5}} // SHADERTEST-NEXT: [[TMP5:%.*]] = load <3 x i64>, ptr addrspace(7) [[TMP4]], align 32 // SHADERTEST-NEXT: [[TMP6:%.*]] = extractelement <3 x i64> [[TMP3]], i64 0 // SHADERTEST-NEXT: [[TMP7:%.*]] = extractelement <3 x i64> [[TMP5]], i64 0 @@ -114,7 +114,7 @@ void main() // SHADERTEST: 66: // SHADERTEST-NEXT: [[DOT022_IN:%.*]] = phi <3 x i1> [ [[TMP50]], [[TMP36]] ], [ [[TMP65]], [[TMP51]] ] // SHADERTEST-NEXT: [[TMP67:%.*]] = load i64, ptr addrspace(7) [[TMP0]], align 8 -// SHADERTEST-NEXT: [[TMP68:%.*]] = getelementptr inbounds <{ i64, i64, [16 x i8], [3 x i64], [8 x i8], [3 x i64] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 1 +// SHADERTEST-NEXT: [[TMP68:%.*]] = getelementptr inbounds {{i8|<{ i64, i64, [[]16 x i8], [[]3 x i64], [[]8 x i8], [[]3 x i64] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} // SHADERTEST-NEXT: [[TMP69:%.*]] = load i64, ptr addrspace(7) [[TMP68]], align 8 // SHADERTEST-NEXT: [[TMP70:%.*]] = icmp ne i64 [[TMP67]], [[TMP69]] // SHADERTEST-NEXT: [[COND_FREEZE4:%.*]] = freeze i1 [[TMP70]] diff --git a/llpc/test/shaderdb/extensions/ExtShaderVote_TestGeneral_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderVote_TestGeneral_lit.frag index e0feba7c85..3a79067deb 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderVote_TestGeneral_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderVote_TestGeneral_lit.frag @@ -34,7 +34,7 @@ void main(void) ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call i1 (...) @lgc.create.subgroup.any.i1( +; SHADERTEST: call i1 @lgc.subgroup.any( ; SHADERTEST: call i1 (...) @lgc.create.subgroup.all.i1( ; SHADERTEST: call i1 (...) @lgc.create.subgroup.all.equal.i1( ; SHADERTEST: call void @lgc.output.export.generic{{.*}}v2f32 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFmaDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFmaDouble_lit.frag index 696f3ba15c..07064eab48 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFmaDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFmaDouble_lit.frag @@ -23,16 +23,16 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load double, ptr addrspace(7) [[TMP0]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <{ double, double, double, [8 x i8], [3 x double], [8 x i8], [3 x double], [8 x i8], [3 x double] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ double, double, double, [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} // CHECK-NEXT: [[TMP4:%.*]] = load double, ptr addrspace(7) [[TMP3]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <{ double, double, double, [8 x i8], [3 x double], [8 x i8], [3 x double], [8 x i8], [3 x double] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds {{i8|<{ double, double, double, [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 2}} // CHECK-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(7) [[TMP5]], align 8 // CHECK-NEXT: [[TMP7:%.*]] = call reassoc nnan nsz arcp contract double (...) @lgc.create.fma.f64(double [[TMP2]], double [[TMP4]], double [[TMP6]]) -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <{ double, double, double, [8 x i8], [3 x double], [8 x i8], [3 x double], [8 x i8], [3 x double] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 4 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds {{i8|<{ double, double, double, [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double] }>}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 4}} // CHECK-NEXT: [[TMP9:%.*]] = load <3 x double>, ptr addrspace(7) [[TMP8]], align 32 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <{ double, double, double, [8 x i8], [3 x double], [8 x i8], [3 x double], [8 x i8], [3 x double] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 6 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds {{i8|<{ double, double, double, [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double] }>}}, ptr addrspace(7) [[TMP0]], i32 {{64|0, i32 6}} // CHECK-NEXT: [[TMP11:%.*]] = load <3 x double>, ptr addrspace(7) [[TMP10]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <{ double, double, double, [8 x i8], [3 x double], [8 x i8], [3 x double], [8 x i8], [3 x double] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds {{i8|<{ double, double, double, [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double] }>}}, ptr addrspace(7) [[TMP0]], i32 {{96|0, i32 8}} // CHECK-NEXT: [[TMP13:%.*]] = load <3 x double>, ptr addrspace(7) [[TMP12]], align 32 // CHECK-NEXT: [[TMP14:%.*]] = call reassoc nnan nsz arcp contract <3 x double> (...) @lgc.create.fma.v3f64(<3 x double> [[TMP9]], <3 x double> [[TMP11]], <3 x double> [[TMP13]]) // CHECK-NEXT: [[D3_0_0_VEC_EXTRACT:%.*]] = extractelement <3 x double> [[TMP14]], i64 0 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFmaFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFmaFloat_lit.frag index b4bfb876b2..015a965534 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFmaFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFmaFloat_lit.frag @@ -23,16 +23,16 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(7) [[TMP0]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <{ float, float, float, [4 x i8], [3 x float], [4 x i8], [3 x float], [4 x i8], [3 x float] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ float, float, float, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float] }>}}, ptr addrspace(7) [[TMP0]], i32 {{4|0, i32 1}} // CHECK-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(7) [[TMP3]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <{ float, float, float, [4 x i8], [3 x float], [4 x i8], [3 x float], [4 x i8], [3 x float] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds {{i8|<{ float, float, float, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 2}} // CHECK-NEXT: [[TMP6:%.*]] = load float, ptr addrspace(7) [[TMP5]], align 4 // CHECK-NEXT: [[TMP7:%.*]] = call reassoc nnan nsz arcp contract afn float (...) @lgc.create.fma.f32(float [[TMP2]], float [[TMP4]], float [[TMP6]]) -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <{ float, float, float, [4 x i8], [3 x float], [4 x i8], [3 x float], [4 x i8], [3 x float] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 4 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds {{i8|<{ float, float, float, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 4}} // CHECK-NEXT: [[TMP9:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <{ float, float, float, [4 x i8], [3 x float], [4 x i8], [3 x float], [4 x i8], [3 x float] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 6 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds {{i8|<{ float, float, float, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float] }>}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 6}} // CHECK-NEXT: [[TMP11:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP10]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <{ float, float, float, [4 x i8], [3 x float], [4 x i8], [3 x float], [4 x i8], [3 x float] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds {{i8|<{ float, float, float, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float] }>}}, ptr addrspace(7) [[TMP0]], i32 {{48|0, i32 8}} // CHECK-NEXT: [[TMP13:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP12]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = call reassoc nnan nsz arcp contract afn <3 x float> (...) @lgc.create.fma.v3f32(<3 x float> [[TMP9]], <3 x float> [[TMP11]], <3 x float> [[TMP13]]) // CHECK-NEXT: [[F3_0_0_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[TMP14]], i64 0 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectDouble_lit.frag index b57bee8eeb..4cc9302d98 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectDouble_lit.frag @@ -28,17 +28,17 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load double, ptr addrspace(7) [[TMP0]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <{ double, double, i32, [12 x i8], [3 x double], [8 x i8], [3 x double], [8 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ double, double, i32, [[]12 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} // CHECK-NEXT: [[TMP4:%.*]] = load double, ptr addrspace(7) [[TMP3]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <{ double, double, i32, [12 x i8], [3 x double], [8 x i8], [3 x double], [8 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds {{i8|<{ double, double, i32, [[]12 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 2}} // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(7) [[TMP5]], align 4 // CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP6]], 0 // CHECK-NEXT: [[TMP7:%.*]] = select reassoc nnan nsz arcp contract i1 [[DOTNOT]], double [[TMP2]], double [[TMP4]] -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <{ double, double, i32, [12 x i8], [3 x double], [8 x i8], [3 x double], [8 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 4 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds {{i8|<{ double, double, i32, [[]12 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 4}} // CHECK-NEXT: [[TMP9:%.*]] = load <3 x double>, ptr addrspace(7) [[TMP8]], align 32 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <{ double, double, i32, [12 x i8], [3 x double], [8 x i8], [3 x double], [8 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 6 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds {{i8|<{ double, double, i32, [[]12 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{64|0, i32 6}} // CHECK-NEXT: [[TMP11:%.*]] = load <3 x double>, ptr addrspace(7) [[TMP10]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <{ double, double, i32, [12 x i8], [3 x double], [8 x i8], [3 x double], [8 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds {{i8|<{ double, double, i32, [[]12 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{96|0, i32 8}} // CHECK-NEXT: [[TMP13:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP12]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x i32> [[TMP13]], i64 1 // CHECK-NEXT: [[DOTNOT2:%.*]] = icmp eq i32 [[TMP14]], 0 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectFloat_lit.frag index 027ac67a06..6889ff741e 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectFloat_lit.frag @@ -28,17 +28,17 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(7) [[TMP0]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <{ float, float, i32, [4 x i8], [3 x float], [4 x i8], [3 x float], [4 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ float, float, i32, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{4|0, i32 1}} // CHECK-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(7) [[TMP3]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <{ float, float, i32, [4 x i8], [3 x float], [4 x i8], [3 x float], [4 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds {{i8|<{ float, float, i32, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 2}} // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(7) [[TMP5]], align 4 // CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP6]], 0 // CHECK-NEXT: [[TMP7:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[DOTNOT]], float [[TMP2]], float [[TMP4]] -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <{ float, float, i32, [4 x i8], [3 x float], [4 x i8], [3 x float], [4 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 4 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds {{i8|<{ float, float, i32, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 4}} // CHECK-NEXT: [[TMP9:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <{ float, float, i32, [4 x i8], [3 x float], [4 x i8], [3 x float], [4 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 6 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds {{i8|<{ float, float, i32, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 6}} // CHECK-NEXT: [[TMP11:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP10]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <{ float, float, i32, [4 x i8], [3 x float], [4 x i8], [3 x float], [4 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds {{i8|<{ float, float, i32, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{48|0, i32 8}} // CHECK-NEXT: [[TMP13:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP12]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x i32> [[TMP13]], i64 1 // CHECK-NEXT: [[DOTNOT2:%.*]] = icmp eq i32 [[TMP14]], 0 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectInt_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectInt_lit.frag index cdac65ca1e..8108e0fbf5 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectInt_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectInt_lit.frag @@ -28,17 +28,17 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(7) [[TMP0]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <{ i32, i32, i32, [4 x i8], [3 x i32], [4 x i8], [3 x i32], [4 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{4|0, i32 1}} // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(7) [[TMP3]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <{ i32, i32, i32, [4 x i8], [3 x i32], [4 x i8], [3 x i32], [4 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 2}} // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(7) [[TMP5]], align 4 // CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP6]], 0 // CHECK-NEXT: [[TMP7:%.*]] = select i1 [[DOTNOT]], i32 [[TMP2]], i32 [[TMP4]] -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <{ i32, i32, i32, [4 x i8], [3 x i32], [4 x i8], [3 x i32], [4 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 4 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 4}} // CHECK-NEXT: [[TMP9:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <{ i32, i32, i32, [4 x i8], [3 x i32], [4 x i8], [3 x i32], [4 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 6 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 6}} // CHECK-NEXT: [[TMP11:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP10]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <{ i32, i32, i32, [4 x i8], [3 x i32], [4 x i8], [3 x i32], [4 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{48|0, i32 8}} // CHECK-NEXT: [[TMP13:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP12]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x i32> [[TMP13]], i64 1 // CHECK-NEXT: [[DOTNOT2:%.*]] = icmp eq i32 [[TMP14]], 0 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectUint_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectUint_lit.frag index 39dbce2bcf..461ebc24c6 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectUint_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectUint_lit.frag @@ -28,17 +28,17 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(7) [[TMP0]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <{ i32, i32, i32, [4 x i8], [3 x i32], [4 x i8], [3 x i32], [4 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{4|0, i32 1}} // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(7) [[TMP3]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <{ i32, i32, i32, [4 x i8], [3 x i32], [4 x i8], [3 x i32], [4 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 2}} // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(7) [[TMP5]], align 4 // CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP6]], 0 // CHECK-NEXT: [[TMP7:%.*]] = select i1 [[DOTNOT]], i32 [[TMP2]], i32 [[TMP4]] -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <{ i32, i32, i32, [4 x i8], [3 x i32], [4 x i8], [3 x i32], [4 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 4 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 4}} // CHECK-NEXT: [[TMP9:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <{ i32, i32, i32, [4 x i8], [3 x i32], [4 x i8], [3 x i32], [4 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 6 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 6}} // CHECK-NEXT: [[TMP11:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP10]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <{ i32, i32, i32, [4 x i8], [3 x i32], [4 x i8], [3 x i32], [4 x i8], [3 x i32] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{48|0, i32 8}} // CHECK-NEXT: [[TMP13:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP12]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x i32> [[TMP13]], i64 1 // CHECK-NEXT: [[DOTNOT2:%.*]] = icmp eq i32 [[TMP14]], 0 diff --git a/llpc/test/shaderdb/extensions/PipelineVsFs_TestFetchSingleInput.pipe b/llpc/test/shaderdb/extensions/PipelineVsFs_TestFetchSingleInput.pipe deleted file mode 100644 index e4d2e4dbc3..0000000000 --- a/llpc/test/shaderdb/extensions/PipelineVsFs_TestFetchSingleInput.pipe +++ /dev/null @@ -1,278 +0,0 @@ -; Test that a fetch shader for 1 input is handled correctly. - -; BEGIN_SHADERTEST -; RUN: amdllpc -use-relocatable-shader-elf -auto-layout-desc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s -; Skip to the patching results for the fetch shader -; SHADERTEST-LABEL: LLPC pipeline patching results -; Check the inputs to the vertex shader. This should be all of the regular inputs. There is one vertex attribute being passed in: The vector at the end. -; SHADERTEST: define dllexport amdgpu_vs void @_amdgpu_vs_main_fetchless(i32 inreg noundef %globalTable, i32 inreg noundef %userdata0, i32 inreg noundef %vertexBufferTable, i32 inreg noundef %baseVertex, i32 inreg noundef %baseInstance, i32 noundef %VertexId, i32 noundef %RelVertexId, i32 noundef %PrimitiveId, i32 noundef %InstanceId, <4 x float> noundef %vertex0.0) -; SHADERTEST-LABEL: LGC glue shader results -; Check the inputs to the fetch shader. This should match the vertex shader except: -; - there are extra inreg inputs because its determination of how many SGPR inputs -; are conservative; -; - there is no VGPR input for the vertex input that the fetch shader generates. -; SHADERTEST: define amdgpu_vs { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, <4 x float> } @_amdgpu_vs_main(i32 inreg noundef %0, i32 inreg noundef %1, i32 inreg noundef %VertexBufferTable, i32 inreg noundef %BaseVertex, i32 inreg noundef %BaseInstance, i32 inreg noundef %2, i32 inreg noundef %3, i32 inreg noundef %4, i32 inreg noundef %5, i32 inreg noundef %6, i32 inreg noundef %7, i32 inreg noundef %8, i32 inreg noundef %9, i32 inreg noundef %10, i32 inreg noundef %11, float noundef %VertexId, float noundef %12, float noundef %13, float noundef %InstanceId) -; Check that the attribute is loaded. -; SHADERTEST: [[f0:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[addr:%[0-9]*]], i32 %VertexIndex, i32 0, i32 0, i32 22, i32 0) -; SHADERTEST: [[f1:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[addr:%[0-9]*]], i32 %VertexIndex, i32 4, i32 0, i32 22, i32 0) -; SHADERTEST: [[f2:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[addr:%[0-9]*]], i32 %VertexIndex, i32 8, i32 0, i32 22, i32 0) -; SHADERTEST: [[vectmp0:%.*]] = insertelement <4 x i32> poison, i32 [[f0]], i{{32|64}} 0 -; SHADERTEST: [[vectmp1:%.*]] = insertelement <4 x i32> [[vectmp0]], i32 [[f1]], i{{32|64}} 1 -; SHADERTEST: [[vecf:%.*]] = insertelement <4 x i32> [[vectmp1]], i32 [[f2]], i{{32|64}} 2 -; Check that the attribute is cast to float so that it will be placed in a VGPR -; SHADERTEST: [[vecCast:%.*]] = bitcast <4 x i32> [[vecf]] to <4 x float> -; Check that the attribute is inserted into the return value, and returned. -; SHADERTEST: %vertex0.0 = insertelement <4 x float> [[vecCast]], float 1.000000e+00, i64 3 -; SHADERTEST: [[retval:%.*]] = insertvalue {{.*}}, <4 x float> %vertex0.0 -; SHADERTEST: ret {{.*}} [[retval]] -; END_SHADERTEST - -; BEGIN_SHADERTEST -; Check that the fetch shader loads the inputs into the correct registers. -; RUN: amdllpc -use-relocatable-shader-elf -o %t.elf %gfxip %s && llvm-objdump --triple=amdgcn --mcpu=gfx1010 -d %t.elf | FileCheck -check-prefix=SHADERTEST2 %s -; SHADERTEST2: Disassembly of section .text: -; SHADERTEST2: 0000000000000000 <_amdgpu_vs_main> -; SHADERTEST2-DAG: v_mov_b32_e32 v7, 1.0 -; SHADERTEST2-DAG: tbuffer_load_format_xyz v[4:6], v{{[0-9]*}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 format:[BUF_FMT_32_32_32_FLOAT] idxen -; Identify the start of the vertex shader -; SHADERTEST2: <_amdgpu_vs_main_fetchless>: -; SHADERTEST2: s_getpc_b64 s[{{[0-9]+}}:{{[0-9]+}}] -; Identify the start of the fragment shader, and check its alignment at the same time. -; SHADERTEST2: 00 <_amdgpu_ps_main> -; END_SHADERTEST - - - -[Version] -version = 40 - -[VsSpirv] - OpCapability Shader - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Vertex %main "main" %_ %pos - OpSource GLSL 450 - OpName %main "main" - OpName %UBO "UBO" - OpMemberName %UBO 0 "projection" - OpMemberName %UBO 1 "model" - OpMemberName %UBO 2 "gradientPos" - OpName %ubo "ubo" - OpName %gl_PerVertex "gl_PerVertex" - OpMemberName %gl_PerVertex 0 "gl_Position" - OpName %_ "" - OpName %pos "pos" - OpMemberDecorate %UBO 0 ColMajor - OpMemberDecorate %UBO 0 Offset 0 - OpMemberDecorate %UBO 0 MatrixStride 16 - OpMemberDecorate %UBO 1 ColMajor - OpMemberDecorate %UBO 1 Offset 64 - OpMemberDecorate %UBO 1 MatrixStride 16 - OpMemberDecorate %UBO 2 Offset 128 - OpDecorate %UBO Block - OpDecorate %ubo DescriptorSet 0 - OpDecorate %ubo Binding 0 - OpMemberDecorate %gl_PerVertex 0 BuiltIn Position - OpDecorate %gl_PerVertex Block - OpDecorate %pos Location 0 - %void = OpTypeVoid - %9 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v4float = OpTypeVector %float 4 -%mat4v4float = OpTypeMatrix %v4float 4 - %UBO = OpTypeStruct %mat4v4float %mat4v4float %float -%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO - %ubo = OpVariable %_ptr_Uniform_UBO Uniform - %int = OpTypeInt 32 1 -%gl_PerVertex = OpTypeStruct %v4float -%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex - %_ = OpVariable %_ptr_Output_gl_PerVertex Output - %int_0 = OpConstant %int 0 -%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float -%_ptr_Input_v4float = OpTypePointer Input %v4float - %pos = OpVariable %_ptr_Input_v4float Input -%_ptr_Output_v4float = OpTypePointer Output %v4float - %float_1 = OpConstant %float 1 - %main = OpFunction %void None %9 - %21 = OpLabel - %22 = OpAccessChain %_ptr_Uniform_mat4v4float %ubo %int_0 - %23 = OpLoad %mat4v4float %22 - %24 = OpLoad %v4float %pos - %25 = OpCompositeInsert %v4float %float_1 %24 3 - %26 = OpMatrixTimesVector %v4float %23 %25 - %27 = OpAccessChain %_ptr_Output_v4float %_ %int_0 - OpStore %27 %26 - OpReturn - OpFunctionEnd - -[VsInfo] -entryPoint = main -userDataNode[0].type = DescriptorTableVaPtr -userDataNode[0].offsetInDwords = 0 -userDataNode[0].sizeInDwords = 1 -userDataNode[0].next[0].type = DescriptorBuffer -userDataNode[0].next[0].offsetInDwords = 0 -userDataNode[0].next[0].sizeInDwords = 4 -userDataNode[0].next[0].set = 0 -userDataNode[0].next[0].binding = 0 -userDataNode[0].next[1].type = DescriptorCombinedTexture -userDataNode[0].next[1].offsetInDwords = 4 -userDataNode[0].next[1].sizeInDwords = 12 -userDataNode[0].next[1].set = 0 -userDataNode[0].next[1].binding = 1 -userDataNode[0].next[2].type = DescriptorBuffer -userDataNode[0].next[2].offsetInDwords = 16 -userDataNode[0].next[2].sizeInDwords = 4 -userDataNode[0].next[2].set = 0 -userDataNode[0].next[2].binding = 2 -userDataNode[1].type = IndirectUserDataVaPtr -userDataNode[1].offsetInDwords = 1 -userDataNode[1].sizeInDwords = 1 -userDataNode[1].indirectUserDataCount = 4 - -options.trapPresent = 0 -options.debugMode = 0 -options.enablePerformanceData = 0 -options.allowReZ = 0 -options.vgprLimit = 0 -options.sgprLimit = 0 -options.maxThreadGroupsPerComputeUnit = 0 -options.waveSize = 0 -options.wgpMode = 0 -options.forceLoopUnrollCount = 0 -options.useSiScheduler = 0 -options.allowVaryWaveSize = 0 -options.enableLoadScalarizer = 0 -options.disableLicm = 0 -options.unrollThreshold = 0 -options.scalarThreshold = 0 - -[FsSpirv] - OpCapability Shader - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Fragment %main "main" %outFragColor - OpExecutionMode %main OriginUpperLeft - OpSource GLSL 450 - OpName %main "main" - OpName %outFragColor "outFragColor" - OpDecorate %outFragColor Location 0 - %void = OpTypeVoid - %5 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v4float = OpTypeVector %float 4 -%_ptr_Output_v4float = OpTypePointer Output %v4float -%outFragColor = OpVariable %_ptr_Output_v4float Output - %float_1 = OpConstant %float 1 - %float_0 = OpConstant %float 0 - %11 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1 - %main = OpFunction %void None %5 - %12 = OpLabel - OpStore %outFragColor %11 - OpReturn - OpFunctionEnd - -[FsInfo] -entryPoint = main -userDataNode[0].type = DescriptorTableVaPtr -userDataNode[0].offsetInDwords = 0 -userDataNode[0].sizeInDwords = 1 -userDataNode[0].next[0].type = DescriptorBuffer -userDataNode[0].next[0].offsetInDwords = 0 -userDataNode[0].next[0].sizeInDwords = 4 -userDataNode[0].next[0].set = 0 -userDataNode[0].next[0].binding = 0 -userDataNode[0].next[1].type = DescriptorCombinedTexture -userDataNode[0].next[1].offsetInDwords = 4 -userDataNode[0].next[1].sizeInDwords = 12 -userDataNode[0].next[1].set = 0 -userDataNode[0].next[1].binding = 1 -userDataNode[0].next[2].type = DescriptorBuffer -userDataNode[0].next[2].offsetInDwords = 16 -userDataNode[0].next[2].sizeInDwords = 4 -userDataNode[0].next[2].set = 0 -userDataNode[0].next[2].binding = 2 - -options.trapPresent = 0 -options.debugMode = 0 -options.enablePerformanceData = 0 -options.allowReZ = 0 -options.vgprLimit = 0 -options.sgprLimit = 0 -options.maxThreadGroupsPerComputeUnit = 0 -options.waveSize = 0 -options.wgpMode = 0 -options.forceLoopUnrollCount = 0 -options.useSiScheduler = 0 -options.allowVaryWaveSize = 0 -options.enableLoadScalarizer = 0 -options.disableLicm = 0 -options.unrollThreshold = 0 -options.scalarThreshold = 0 - -[GraphicsPipelineState] -topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST -patchControlPoints = 0 -deviceIndex = 0 -disableVertexReuse = 0 -switchWinding = 0 -enableMultiView = 0 -depthClipEnable = 1 -rasterizerDiscardEnable = 0 -perSampleShading = 0 -numSamples = 1 -samplePatternIdx = 0 -usrClipPlaneMask = 0 -polygonMode = VK_POLYGON_MODE_FILL -cullMode = VK_CULL_MODE_NONE -frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE -depthBiasEnable = 0 -alphaToCoverageEnable = 0 -dualSourceBlendEnable = 0 -colorBuffer[0].format = VK_FORMAT_B8G8R8A8_UNORM -colorBuffer[0].channelWriteMask = 15 -colorBuffer[0].blendEnable = 0 -colorBuffer[0].blendSrcAlphaToColor = 1 -nggState.enableNgg = 0 -nggState.enableGsUse = 0 -nggState.forceNonPassthrough = 0 -nggState.alwaysUsePrimShaderTable = 0 -nggState.enableFastLaunch = 0 -nggState.enableVertexReuse = 0 -nggState.enableBackfaceCulling = 0 -nggState.enableFrustumCulling = 0 -nggState.enableBoxFilterCulling = 0 -nggState.enableSphereCulling = 0 -nggState.enableSmallPrimFilter = 0 -nggState.enableCullDistanceCulling = 0 -nggState.backfaceExponent = 0 -nggState.subgroupSizing = Auto -nggState.primsPerSubgroup = 0 -nggState.vertsPerSubgroup = 0 -options.includeDisassembly = 0 -options.scalarBlockLayout = 0 -options.includeIr = 0 -options.robustBufferAccess = 0 -options.reconfigWorkgroupLayout = 0 - - -[VertexInputState] -binding[0].binding = 0 -binding[0].stride = 44 -binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX -attribute[0].location = 0 -attribute[0].binding = 0 -attribute[0].format = VK_FORMAT_R32G32B32_SFLOAT -attribute[0].offset = 0 -attribute[1].location = 1 -attribute[1].binding = 0 -attribute[1].format = VK_FORMAT_R32G32_SFLOAT -attribute[1].offset = 12 -attribute[2].location = 2 -attribute[2].binding = 0 -attribute[2].format = VK_FORMAT_R32G32B32_SFLOAT -attribute[2].offset = 20 -attribute[3].location = 3 -attribute[3].binding = 0 -attribute[3].format = VK_FORMAT_R32G32B32_SFLOAT -attribute[3].offset = 32 diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestUberShader.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestUberShader.pipe index 17704c8b38..0cdd2055b8 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestUberShader.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestUberShader.pipe @@ -9,7 +9,7 @@ ; SHADERTEST: [[PTR1:%[.a-z0-9]+]] = insertelement <2 x i32> [[PTR0]], i32 %userdata3, i64 1 ; SHADERTEST: [[PTR2:%[.a-z0-9]+]] = bitcast <2 x i32> [[PTR1]] to i64 ; SHADERTEST: [[INTDESCPTR:%[0-9]+]] = inttoptr i64 [[PTR2]] to ptr addrspace(4) -; SHADERTEST: [[CURDESCPTR:%[0-9]+]] = getelementptr i64, ptr addrspace(4) [[INTDESCPTR]], i64 1 +; SHADERTEST: [[CURDESCPTR:%[0-9]+]] = getelementptr {{i8|i64}}, ptr addrspace(4) [[INTDESCPTR]], i64 {{8|1}} ; SHADERTEST: [[UBERINFO:%[0-9]*]] = load <4 x i32>, ptr addrspace(4) [[CURDESCPTR]], align 16 ; Load vertex diff --git a/llpc/test/shaderdb/general/UndefVertexOutput.spvasm b/llpc/test/shaderdb/general/UndefVertexOutput.spvasm index 8bcd0cb765..83197a426f 100644 --- a/llpc/test/shaderdb/general/UndefVertexOutput.spvasm +++ b/llpc/test/shaderdb/general/UndefVertexOutput.spvasm @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py -; RUN: amdllpc -o - -filetype=asm -gfxip 9.0 %s | FileCheck -check-prefixes=CHECK %s +; RUN: amdllpc -o - -filetype=asm -gfxip 10.1 %s | FileCheck -check-prefixes=CHECK %s ; Make sure that the export channels match the location in the spir-v. ; This is how the PS knows which channel from which to read the value. @@ -58,19 +58,43 @@ OpStore %11 %float_0 OpReturn OpFunctionEnd -; CHECK-LABEL: amdgpu_vs_main: -; CHECK: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_mov_b32 s0, s1 -; CHECK-NEXT: s_mov_b32 s1, s5 -; CHECK-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 -; CHECK-NEXT: v_add_u32_e32 v0, s2, v0 +; CHECK-LABEL: amdgpu_gs_main: +; CHECK: s_mov_b32 exec_lo, -1 +; CHECK-NEXT: s_bfe_u32 s3, s3, 0x40018 +; CHECK-NEXT: s_bfe_u32 s4, s2, 0x90016 +; CHECK-NEXT: s_bfe_u32 s1, s2, 0x9000c +; CHECK-NEXT: s_mov_b32 s0, s9 +; CHECK-NEXT: s_cmp_lg_u32 s3, 0 +; CHECK-NEXT: s_barrier +; CHECK-NEXT: s_cbranch_scc1 .LBB0_2 +; CHECK-NEXT: s_lshl_b32 s2, s4, 12 +; CHECK-NEXT: s_or_b32 m0, s2, s1 +; CHECK-NEXT: s_sendmsg sendmsg(MSG_GS_ALLOC_REQ) +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: v_mbcnt_lo_u32_b32 v1, -1, 0 +; CHECK-NEXT: v_lshl_or_b32 v1, s3, 5, v1 +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, s4, v1 +; CHECK-NEXT: s_and_saveexec_b32 s2, vcc_lo +; CHECK-NEXT: s_cbranch_execz .LBB0_4 +; CHECK-NEXT: exp prim v0, off, off, off done +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: s_waitcnt expcnt(0) +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s2 +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, s1, v1 +; CHECK-NEXT: s_and_saveexec_b32 s1, vcc_lo +; CHECK-NEXT: s_cbranch_execz .LBB0_6 +; CHECK-NEXT: s_getpc_b64 s[2:3] +; CHECK-NEXT: v_add_nc_u32_e32 v0, s10, v5 +; CHECK-NEXT: s_mov_b32 s1, s3 ; CHECK-NEXT: v_mov_b32_e32 v4, 1.0 +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x10 ; CHECK-NEXT: v_mov_b32_e32 v5, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[4:7], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_FLOAT] idxen +; CHECK-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: exp pos0 v0, v1, v2, v3 done ; CHECK-NEXT: exp param7 v5, v5, v4, v4 ; CHECK-NEXT: exp param9 off, v5, off, off ; CHECK-NEXT: exp param8 v4, v5, v5, v4 +; CHECK-NEXT: .LBB0_6: ; CHECK-NEXT: s_endpgm diff --git a/llpc/test/shaderdb/gfx11/TessFactorStoreWithOpt.pipe b/llpc/test/shaderdb/gfx11/TessFactorStoreWithOpt.pipe index 557fcf3307..28cc405ca6 100644 --- a/llpc/test/shaderdb/gfx11/TessFactorStoreWithOpt.pipe +++ b/llpc/test/shaderdb/gfx11/TessFactorStoreWithOpt.pipe @@ -130,7 +130,7 @@ ; SHADERTEST-NEXT: br label %.endTryStoreTf ; SHADERTEST-LABEL: .storeTf: -; SHADERTEST: %tfBufferDescPtr = getelementptr <4 x i32>, ptr addrspace(4) %globalTablePtr, i64 9 +; SHADERTEST: %tfBufferDescPtr = getelementptr {{i8|<4 x i32>}}, ptr addrspace(4) %globalTablePtr, i64 {{144|9}} ; SHADERTEST-NEXT: %tfBufferDesc = load <4 x i32>, ptr addrspace(4) %tfBufferDescPtr, align 16 ; SHADERTEST-NEXT: %[[OUTER_TF_OFFSET:[^ ,]*]] = mul i32 %threadIdInGroup, 24 ; SHADERTEST-NEXT: call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %outerTf, <4 x i32> %tfBufferDesc, i32 %[[OUTER_TF_OFFSET]], i32 %tfBufferBase, i32 63, i32 1) diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe index 240c8b3ffc..d023d321f7 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --function _amdgpu_ps_main ; Test that constant data in the fragment shader is handled correctly. -; RUN: amdllpc -v -gfxip 9.0.0 -enable-relocatable-shader-elf %s | FileCheck -check-prefix=SHADERTEST %s -; RUN: amdllpc -v -gfxip 9.0.0 -enable-part-pipeline=0 %s | FileCheck -check-prefix=SHADERTEST2_PP0 %s -; RUN: amdllpc -v -gfxip 9.0.0 -enable-part-pipeline=1 %s | FileCheck -check-prefix=SHADERTEST2_PP1 %s +; RUN: amdllpc -v -gfxip 10.1.0 -enable-relocatable-shader-elf %s | FileCheck -check-prefix=SHADERTEST %s +; RUN: amdllpc -v -gfxip 10.1.0 -enable-part-pipeline=0 %s | FileCheck -check-prefix=SHADERTEST2_PP0 %s +; RUN: amdllpc -v -gfxip 10.1.0 -enable-part-pipeline=1 %s | FileCheck -check-prefix=SHADERTEST2_PP1 %s [Version] version = 40 diff --git a/llpc/test/shaderdb/object/ObjPushConst_TestNestedStruct_lit.vert b/llpc/test/shaderdb/object/ObjPushConst_TestNestedStruct_lit.vert index c0c8cc0702..bf94763ca2 100644 --- a/llpc/test/shaderdb/object/ObjPushConst_TestNestedStruct_lit.vert +++ b/llpc/test/shaderdb/object/ObjPushConst_TestNestedStruct_lit.vert @@ -22,7 +22,7 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIR-V lowering results ; SHADERTEST: [[V0:%.*]] = call {{.*}} @lgc.create.load.push.constants.ptr -; SHADERTEST: [[V1:%.*]] = getelementptr {{.*}} addrspace(4) [[V0]], i64 0, i32 1, i32 1 +; SHADERTEST: [[V1:%.*]] = getelementptr {{.*}} addrspace(4) [[V0]], i64 {{8|0, i32 1, i32 1}} ; SHADERTEST: load float, ptr addrspace(4) [[V1]], align 4 diff --git a/llpc/test/shaderdb/object/ObjPushConst_TestSpillToMemory_lit.vert b/llpc/test/shaderdb/object/ObjPushConst_TestSpillToMemory_lit.vert index 5dc55878d6..71fb771f2d 100644 --- a/llpc/test/shaderdb/object/ObjPushConst_TestSpillToMemory_lit.vert +++ b/llpc/test/shaderdb/object/ObjPushConst_TestSpillToMemory_lit.vert @@ -17,7 +17,7 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIR-V lowering results ; SHADERTEST: [[V0:%.*]] = call {{.*}} @lgc.create.load.push.constants.ptr -; SHADERTEST: [[V1:%.*]] = getelementptr {{.*}} addrspace(4) [[V0]], i64 0, i32 1, i64 8 +; SHADERTEST: [[V1:%.*]] = getelementptr {{.*}} addrspace(4) [[V0]], i64 {{144|0, i32 1, i64 8}} ; SHADERTEST: load <4 x float>, ptr addrspace(4) [[V1]], align 16 ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/object/ObjPushConstant_TestBasic_lit.frag b/llpc/test/shaderdb/object/ObjPushConstant_TestBasic_lit.frag index e570d07f15..21f3f3c362 100644 --- a/llpc/test/shaderdb/object/ObjPushConstant_TestBasic_lit.frag +++ b/llpc/test/shaderdb/object/ObjPushConstant_TestBasic_lit.frag @@ -35,9 +35,9 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIR-V lowering results ; SHADERTEST: [[V0:%.*]] = call {{.*}} @lgc.create.load.push.constants.ptr -; SHADERTEST: [[V1:%.*]] = getelementptr {{.*}} addrspace(4) [[V0]], i64 0, i32 4 +; SHADERTEST: [[V1:%.*]] = getelementptr {{.*}} addrspace(4) [[V0]], i64 {{64|0, i32 4}} ; SHADERTEST: load <4 x float>, ptr addrspace(4) [[V1]], align 16 -; SHADERTEST: [[V11:%.*]] = getelementptr {{.*}} addrspace(4) [[V0]], i64 0, i32 4 +; SHADERTEST: [[V11:%.*]] = getelementptr {{.*}} addrspace(4) [[V0]], i64 {{64|0, i32 4}} ; SHADERTEST: load <4 x float>, ptr addrspace(4) [[V11]], align 16 ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe new file mode 100644 index 0000000000..6ef532df23 --- /dev/null +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe @@ -0,0 +1,188 @@ +; RUN: amdllpc -o - -print-after=prepare-continuations -gpurt-use-dumped=true -llpc-raytracing-mode=continuations -emit-lgc %s | FileCheck -check-prefixes=CHECK %s + +[Version] +version = 70 + +[rgenGlsl] +#version 460 +#extension GL_EXT_ray_tracing : enable + +void main() +{ +} + +[rgenInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 16128 +userDataNode[0].type = DescriptorTableVaPtr +userDataNode[0].offsetInDwords = 8 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].next[0].type = DescriptorConstBufferCompact +userDataNode[0].next[0].offsetInDwords = 0 +userDataNode[0].next[0].sizeInDwords = 2 +userDataNode[0].next[0].set = 0x0000005D +userDataNode[0].next[0].binding = 17 +userDataNode[0].next[0].strideInDwords = 0 +userDataNode[0].next[1].type = DescriptorConstBuffer +userDataNode[0].next[1].offsetInDwords = 2 +userDataNode[0].next[1].sizeInDwords = 4 +userDataNode[0].next[1].set = 0x0000005D +userDataNode[0].next[1].binding = 0 +userDataNode[0].next[1].strideInDwords = 0 +userDataNode[0].next[2].type = DescriptorBuffer +userDataNode[0].next[2].offsetInDwords = 6 +userDataNode[0].next[2].sizeInDwords = 4 +userDataNode[0].next[2].set = 0x0000005D +userDataNode[0].next[2].binding = 1 +userDataNode[0].next[2].strideInDwords = 0 +userDataNode[1].visibility = 2 +userDataNode[1].type = StreamOutTableVaPtr +userDataNode[1].offsetInDwords = 3 +userDataNode[1].sizeInDwords = 1 +userDataNode[2].visibility = 16128 +userDataNode[2].type = DescriptorTableVaPtr +userDataNode[2].offsetInDwords = 7 +userDataNode[2].sizeInDwords = 1 +userDataNode[2].next[0].type = DescriptorImage +userDataNode[2].next[0].offsetInDwords = 0 +userDataNode[2].next[0].sizeInDwords = 8 +userDataNode[2].next[0].set = 0x00000000 +userDataNode[2].next[0].binding = 0 +userDataNode[2].next[0].strideInDwords = 0 +userDataNode[2].next[1].type = DescriptorConstBuffer +userDataNode[2].next[1].offsetInDwords = 8 +userDataNode[2].next[1].sizeInDwords = 4 +userDataNode[2].next[1].set = 0x00000000 +userDataNode[2].next[1].binding = 1 +userDataNode[2].next[1].strideInDwords = 0 + +[RayTracingPipelineState] +deviceIndex = 0 +options.includeDisassembly = 0 +options.scalarBlockLayout = 1 +options.resourceLayoutScheme = Compact +options.includeIr = 0 +options.robustBufferAccess = 0 +options.reconfigWorkgroupLayout = 0 +options.forceCsThreadIdSwizzling = 0 +options.overrideThreadGroupSizeX = 0 +options.overrideThreadGroupSizeY = 0 +options.overrideThreadGroupSizeZ = 0 +options.shadowDescriptorTableUsage = Disable +options.shadowDescriptorTablePtrHigh = 0 +options.extendedRobustness.robustBufferAccess = 0 +options.extendedRobustness.robustImageAccess = 1 +options.extendedRobustness.nullDescriptor = 0 +options.optimizeTessFactor = 1 +options.optimizationLevel = 2 +options.threadGroupSwizzleMode = Default +options.reverseThreadGroup = 0 +options.enableImplicitInvariantExports = 1 +options.internalRtShaders = 0 +options.forceNonUniformResourceIndexStageMask = 0 +options.replaceSetWithResourceType = 0 +options.disableSampleMask = 0 +options.buildResourcesDataForShaderModule = 0 +options.disableTruncCoordForGather = 1 +options.enableCombinedTexture = 0 +options.vertex64BitsAttribSingleLoc = 0 +options.enableFragColor = 0 +options.disableBaseVertex = 0 +options.enablePrimGeneratedQuery = 0 +options.disablePerCompFetch = 0 +groups[0].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR +groups[0].generalShader = 0 +groups[0].closestHitShader = -1 +groups[0].anyHitShader = -1 +groups[0].intersectionShader = -1 +groups[1].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR +groups[1].generalShader = 3 +groups[1].closestHitShader = -1 +groups[1].anyHitShader = -1 +groups[1].intersectionShader = -1 +groups[2].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR +groups[2].generalShader = -1 +groups[2].closestHitShader = 2 +groups[2].anyHitShader = 1 +groups[2].intersectionShader = -1 +maxRecursionDepth = 1 +indirectStageMask = 4294967295 +libraryMode = 1 +mode = 1 +rtState.bvhResDescSize = 4 +rtState.bvhResDesc[0] = 0 +rtState.bvhResDesc[1] = 2197815296 +rtState.bvhResDesc[2] = 4294967295 +rtState.bvhResDesc[3] = 2172650495 +rtState.nodeStrideShift = 7 +rtState.staticPipelineFlags = 512 +rtState.triCompressMode = 3 +rtState.pipelineFlags = 8192 +rtState.threadGroupSizeX = 8 +rtState.threadGroupSizeY = 4 +rtState.threadGroupSizeZ = 1 +rtState.boxSortHeuristicMode = 5 +rtState.counterMode = 0 +rtState.counterMask = 0 +rtState.rayQueryCsSwizzle = 1 +rtState.ldsStackSize = 16 +rtState.dispatchRaysThreadGroupSize = 32 +rtState.ldsSizePerThreadGroup = 65536 +rtState.outerTileSize = 4 +rtState.dispatchDimSwizzleMode = 0 +rtState.exportConfig.indirectCallingConvention = 1 +rtState.exportConfig.indirectCalleeSavedRegs.raygen = 2 +rtState.exportConfig.indirectCalleeSavedRegs.miss = 40 +rtState.exportConfig.indirectCalleeSavedRegs.closestHit = 50 +rtState.exportConfig.indirectCalleeSavedRegs.anyHit = 75 +rtState.exportConfig.indirectCalleeSavedRegs.intersection = 75 +rtState.exportConfig.indirectCalleeSavedRegs.callable = 28 +rtState.exportConfig.indirectCalleeSavedRegs.traceRays = 28 +rtState.exportConfig.enableUniformNoReturn = 1 +rtState.exportConfig.enableTraceRayArgsInLds = 0 +rtState.exportConfig.readsDispatchRaysIndex = 0 +rtState.exportConfig.enableDynamicLaunch = 0 +rtState.exportConfig.emitRaytracingShaderDataToken = 0 +rtState.enableRayQueryCsSwizzle = 0 +rtState.enableDispatchRaysInnerSwizzle = 1 +rtState.enableDispatchRaysOuterSwizzle = 1 +rtState.forceInvalidAccelStruct = 0 +rtState.enableRayTracingCounters = 0 +rtState.enableRayTracingHwTraversalStack = 1 +rtState.enableOptimalLdsStackSizeForIndirect = 1 +rtState.enableOptimalLdsStackSizeForUnified = 1 +rtState.maxRayLength = 0 +rtState.enablePickClosestLaneResultForAbortRays = 0 +rtState.traceRayWaveDensityThreshold[8] = 1 +rtState.traceRayWaveDensityThreshold[10] = 1 +rtState.traceRayWaveDensityThreshold[11] = 1 +rtState.traceRayWaveDensityThreshold[12] = 1 +rtState.gpurtFeatureFlags = 0 +rtState.gpurtShaderLibrary = Shader_0xE4BF4BB5EC6FAB41.spv +rtState.gpurtFuncTable.pFunc[0] = TraceRay2_0 +rtState.gpurtFuncTable.pFunc[1] = TraceRayInline2_0 +rtState.gpurtFuncTable.pFunc[2] = TraceRayUsingHitToken2_0 +rtState.gpurtFuncTable.pFunc[3] = RayQueryProceed2_0 +rtState.gpurtFuncTable.pFunc[4] = GetInstanceIndex +rtState.gpurtFuncTable.pFunc[5] = GetInstanceID +rtState.gpurtFuncTable.pFunc[6] = GetObjectToWorldTransform +rtState.gpurtFuncTable.pFunc[7] = GetWorldToObjectTransform +rtState.gpurtFuncTable.pFunc[8] = GetRayQuery64BitInstanceNodePtr +rtState.gpurtFuncTable.pFunc[9] = TraceLongRayAMD2_0 +rtState.gpurtFuncTable.pFunc[10] = LongRayQueryProceedAMD2_0 +rtState.gpurtFuncTable.pFunc[11] = FetchTrianglePositionFromNodePointer +rtState.gpurtFuncTable.pFunc[12] = FetchTrianglePositionFromRayQuery +rtState.rtIpVersion = 2.0 +rtState.gpurtOverride = 0 +rtState.rtIpOverride = 0 +payloadSizeMaxInLib = 0 +attributeSizeMaxInLib = 0 +hasPipelineLibrary = 0 +pipelineLibStageMask = 0 + +; CHECK-LABEL: ; ModuleID = 'lgcPipeline' +; CHECK-NEXT: source_filename = "main" +; CHECK: define dllexport void @lgc.shader.CS.main() !lgc.shaderstage !{{[0-9]+}} !lgc.rt.shaderstage !{{[0-9]+}} { +; CHECK: call void (...) @lgc.cps.jump.cloned._cs_(i32 %{{[0-9]+}}, i32 -1, {} poison, i32 poison, i32 %{{[0-9]+}}, { <3 x i32>, i32 } %{{[0-9]+}}) diff --git a/llpc/test/shaderdb/ray_tracing/Shader_0xE4BF4BB5EC6FAB41.spv b/llpc/test/shaderdb/ray_tracing/Shader_0xE4BF4BB5EC6FAB41.spv new file mode 100644 index 0000000000000000000000000000000000000000..7851b171db12f2b1c2ed34c056eec12d5a6b623e GIT binary patch literal 218568 zcmcG%2b@(^(zkn5GG;L=f;s1e8ATBlMZp{qT1BB78ZZY?%woMft|d++bwJ+4{*e^sqowQ7aE_vzEkG*d6ovDSX7TCFyH?fHeNxMr$N zUF%xvHA}rtsn;rX?^4@R`}WjbYaMFxzMBl(Wa-gkhAzG0O3Om=JdIUd*S^-V_7-@H z!TW7KwyAZ$e$8W=TAK$CSKq$Yt~M_Y`m~G~JgiyXv9=!aorknEkJ))+?3#wJ-#n&u zzq)Pv+6L5vS_cnl!r5{=FNb~STHhqwa`dp~-8URIW>CvsP0bpVb{i&}<#%4T-mYJg zwP~k8yw`e_|Lj}OLd!|i}4Ohy3PNji*_AqTcGRPG^Stk=rMzv zhcxx;1Gmkz3BxL+QJD^lmM!BZ|$I4V#9prI@y{ z=~COOVH0UvdBt^yO$ubwmZ41p#s55?H;)?I zG}e{ew~3`+&*jbNXckk)RM2|Nupt`^AH4hMy3KU8=Q>1;eOBl*aF4-5n_AgYt;3o} z4;wOi=S^A$5ACy0OY6|yV|ouAY7*MlI@ONuGzGsy_F8xF=%%{<+P3;lhK{aB-LHAr zn7Y2TbM!fIOiOE1n-27U^CbP0`tMqMs7tl~bzAlw*wQ*?!=|yV!$vd0{>-qkPFwqo zgQ#r|VxmoQYt!h_%yq*Y`_8pTrf*{(vDX7_%va}Hj~Uz8FdOw04;b7!ctj(woof^S zWWzzydkmS~m6;!1kL|1}*+h7MT6kV%i-86a(z8iB_AH&Umw(CD^ z^yp$?UZ$&kGE;pFtj+($yzjeW-EUL3HsfxrCsreaGTqp8svR=xlx>FX@7m~CJE^Un z{BFU{XvFmNJpWH(TE0GxW#;{>Huby1Xikzzw+Uky)Myj?f*7`L9maNaf1f*FoA}Lf z%t-I%{o3aF{I<5_qdLc3YqRzElfOwpvGUTKbgn(z)|PdJzxvIm&)_kG&C&F=XZ~ap z;bpgJq1ET5Lv6VQBTt(QZf;tyWo)y1u|w_0Hu8S;qqr~kQI4eg{g#S*V!Qes*n4yR z{tRzduy*Rbf2-rjsdJ8%a=&iyj)g1No+F~JS1xDE*ST^zTfEMd%YB~EzjFDsg)5g& zSGaOH+qiD8Ts}kL%H?d4x?Z`Q?NH~+i(9f^R6}f(YEod)o0k~k%PIZayuGbUsEffs$YGXIb)czZJ%Ky znz-|{G>?u3eVK7%oNUiFTW@gl&|yOdk7@FL=E>ujyVQ)mtzFBA5yQreX&TzrPMgVg z^~VST-K=%k?!%fV_uIARSpRI-XILu-3NeN$eRryvhsn0M8`R%N(E6o*$*MkX!pQrw z`j@Ke2dy`@wY7;|+PJh?x{VFT!`P9N?)O@0vrHSCNsnIfh*h^)cFHz=^x>tS<)|l* zr*Bish$bGK8|OpArb})4HZ}uVhmq`IdpAuss{86vGf|U$wRyB#ao>_f=WoR}c5NQ- z+Ssks#;)P7I)QyTF;T+aCFo?0dDb zuRkI*o-e0p->J4*8~ffvhBS@z^e_Ip)>fagzIXG`^_zzdY#Q7;WRGIY^JE*_e!DmG zkTuy?m)e?b^o(VbmLY?OZ_(1iZL&Oy*VlmWYLmy@Z*<$98hKf}jg5Pghty%sV|mWx z?&ia4d2qz8FQ@W4|D%21;Vruk9^SucME!XZbJ}&P^={K{i>8r2?oG;1=i0h$^xn27 z-CA`~AJ2MiY}&pLHf%c8+ys-y9FGWU8*!{p-A2E6Q|su)>nDA5tiMyLkCx@N5nWeG zqiyKgyeEcXy`PS?4J)>dw<=@kTI*NQweb^%bw8c!@0^q3sl0b$g1^r7ch4!`V;ore$@)o~#&*olpvg9E4m)C-^d5^z-FC~#wrvi) zKif`Qf2S?6#WYOuy|&?d`ue-A_W#@W+eW+T>hHLJ*)Dpw0ast1`nIKRJR3|?+b-el z6W$@=9TVOu;hhuSCE>vd@0#$Cggvh#2A>K7@0RfH343CO&9F(hU2RXW&p*cKvy|ND zqj(nF3!L@xcz!Y$a-Zuq^XUeeaSY$pYhRpu7>yc!gi)_Ip>r!NkCRx`a^Z6$H8LR$$&Ip;$;mW+n zhsmp>;wPf3AikIDMByXA zUcr41&iWR(?L$8jY;3_tfsHG8E7x!YjD@#4EUa#4EV_!z;M+<`vw1 z;T7CH;T7Ed;3aoF?gg*VyAQmAy9d02yZ*g`yY{_;yY9V$yXL)uyWYKmdk%R8_dV7t zxbLxE!F`YQ3f`m6Ci9*ZK7WO~*1e+rf(<%pZS4(rj$^Iu19pys?+bR0gYO4+j)U(H zc8-G|0CtXp9|(4igC7KTj)NZzc8-G|0(Oprj{`ervon8(QoH_L-`(NksYA9jQu$#N z_sIn6btqm-u$$zEQ`Vt)h3tq1t^dD1^nYa19|c~vu5HAm{%Fb{fBaF8(KU5UT|aJZ zW0oIV=acP^tMG~NsVLg(?|6!PH24YNAL*bQr^bok#fZ!LB+B9ip9~KBQ@}k6_k22) z;yP6?KaJu#4t_e=bsPK)u%SX27EloD%Neicli74R7;-PsveQ5%PLxj5ddi*9P_?(BjqskK+0Z8LVPj!D~VDBa2IwbZU% z$EjSsvg;@@=Ig=cHP-D7VCTpyY;Hv6!U}$qoZ#%^W?+uZK9t=;5v#kEvLv>~e;dVh zY_a|A)W)QpYrrU$}k;i10|9OsjH zyr1IQUXoRJDD?xB<5G#fA4JxJ;uW%oz-!kflVuOXT^n9u_Xybi=oN813jV$1+BhGh zcyEa|kAs~Hs!4600I!qyli+a_V|R@|Me*#25`8_5>~~7=XTaa2d43lDE4(`=@pIr< zpUzueYk{3B?VhKE&%c2O)Oq7<{CnY4lWbl9cT4<5m83O%`~xx%CHPC=KMs*Me7+1{ zk>V9LuYkKJ{!f*pDfF*4csuvk>tM&=n(9u@UZYkPMV(_){su)K&dr+?p9{LP%ipHf zE^;J}68VZVr#ma=t)vgT|4r?gR_!DDj{1M{<5=wfUzEtlJKzN==Hp#zeYiH4WLL=F zqvZ83j<)ZE|CmqSn2!(Oo*`ag^PvQx``*0%n-c!Q<|A#`COzo+V}R$qamc?e+`M;Z zHabr=IltBMR`XNUPl^A(X)`51evW~N{Fi~Y|7_npcycu(_w=6g}KZ*Asf5%?~ozB|}=9=SfdQBO;I zW%6aHJ(I^%dcv269|~5ketBy3%H*pR?)^ghUU2Ut%GIw~^vdMD3%|D5uLD=7-1Zw# zn|rzT{iy9vnfd{6=PC39i(Z-ft>ES$^xGD_vYzaN9a4K^>q$PEsmG(Ah3p<)m$k1Q zO&xx;GnR?e#$wLrfZqZ)u67iC-wIb|-wogGYtIy$kh#}iL1w(Rb3VR>dtMp;cVM~p z_W2{VeJYdxTzJEtpL((&j9Gha^)X$M#T?89*N69Z^$Wlqn{z5(6s}Ht^RaZ%+g~3r z|JCi)Z47bzJ=sxPf}@@DV6J_h*a^A*qv&TxWX7P*wYw{|dGOiCwL2KDEZRhE+g*^` zRvYvDJhfvse*4tsjbh_jWt=l|?rEd08`$=qfjxNwS*_@7@0{wRS{%<`75(-ve!V}) zH!J*t!nY~h=RV{96z+4Gef6f;=F4KE+}KuUQ5bhMp3MI$E_FS*vyb}!z)$qqlbtbJ z`^H>{Y)bY26qkC(JON+M$6NI=cc>j!d@I+lXG~8l&aKF8-_Uif-GR-JZ;`!CPE^D{p;3DMMP@wiH`m}GWQWpL zJOtcWo9$}PgN-N7h*#ma5L@{3Y@)cMaQBmaIwuRk`LF)mAz!m_W3c@eaQkn>+@ZDuJjW1zPe$%O)33R{ z9`3zCZfTq`9cy=#_R3-$;o~`U#_FEd$7}HM$w!=9ZzFe{!QU;s;b)rK2k^$8XP<$M z*Rw#MKNZ{PKYaMY5Nn_(q#N9EyPm=};x#Y*&>Mp~^ELqP{nYqf!+XK61V>x_xISE4 zW6^0B#r9*65n5emoOi>=m-hPTO>wN0uHI+pd|xs7d| zPwy$lItwq>p?98?H*z_9?K*VkA;vzX`hVL0(Z=zM<2-l{-;UY$7~{Ns#ztSUx828K z-;?aVgx)wqAAX~by)QQ}^YOxptXpy}4K4LYCLXU!4)==WFae2V(B+!~Gn6*}f;AEzZveOJSGw zVdtE0fQ@S-?X(3^ZxJVs?k@6+M3wMT5_&pzV<<$*Np4p z6lC5r8*Mw+?tn+0m02R+=E(Qu2eENISUk@jfP3C5cTb$Lz+@lMHhkIVA`3R!Hs-cn zZB@8)WMLxML z-%&>PE^#<-#+LW!p!j@v$-N_=v@JJh}{@>mCr_cfkHiaf75{rsNnqmB8P zX5q%XHTvW83Anj$@b1&}iMOwRF7-{}BdAwrpL|XIZILx6*>^?Ol4Rc( z*~ldOp~yxh*^fomnq)r}+2|zuxyW)XzZ6*`mPQVLEwaerZx#M~h5u3EHRdDF!BiDK zb%jq;;q5BCeT8?Z@Q#W58CsXb{S0l|#HRyKpZE;mnG&BFyeH#fd)C)bH~3y)pL=5d z<}CI(k8>4Sp3}LDEYImYMV9Au-XhC$+P%o~oX%Hdc}{y2+1@F>o<+7#lFeUa`zF}} zMYdm(Em&mxC)q+pc0iIXTx16(*&;=DP?9ZLWCthNVnudHk}Y0jIj2h$S&n7NDfnBe z$j0GsW%ii&D~_c4<>!K{@ZRZr)3THwnc3y2dt>MG`U*vEziV=a*mlJt{~cTLsx0X6 z%du0xvfq`k(eKLCy^4=jY)8>Y9Ty*dhPX)a(F?t?`TVy!MIUQW8=KFIYZbX`%GhkX zc9H8>9I>fewD?^Ixqf?7uS?PIdeptymHO>d`7_j%|RE57~QGko_2 z>w80LeS6>8h@#w>UE{`W`;CiE-{S0h@hSM;1RH(#rw-qnf|cvry%N4RL#9sO;_y8d znW&;?@|;mKL-xqI~MF5?nJSlovA&io$FmFKF_!Yhf@!R?@F;h z`4F&Y!_};>6X8R_+9>~l_0mM`^)vSBcB3e>AF+O}MZY`PSk|JAead^`(>1gQ*mbB} zy>Tl02_NCh+BI(8hf%bNb?DglEb^F}z2Np&otxpv)Q!iN?M8rYtK6K*Bd6ohFHP}k zM(&tesLh>c!$^v`lY1_VD%>%sI}Bay?N<0`N}M5Mz}h$l?d95p{aCPPfja%lqwU_{ zXsgWivJXXhyn!PWi_LZ;mO#@aj>t~~lX z1Z;oVW*l63^fw-CJLQ*`7!Cy+hdS5TVbor(F?ACt%A60eIXj&C2+ENZ*G=fHjc+{d zj{-Xm=UE){Y_68E1FoZy$KE-n;5Dg_rP$wb)Xw)r>f z_Aqsf?GbPu+oOfY*dBxHqdK<7k*PCQeaK^MPk`+!kL^jga_#Ly9%Fk7{4_;B>KvQ0 z7@N;fE0wW5gFNQ#*@B(7=O~WtdFmM3-@tine=ppz^#i{^(T8#7=d~A$Zgcj6_tt;F zm3f8ir6N-tvX_g@_oo5${0jV^6x(aB{up$T+gIVye?CWFL$1zTIR<&;_I0ph&AEL8 zu3US^D39E}2{yO-QFjt!GM{gO)fu1pmgju0jEz^$_uJUSTKX5*e7{2-`FJ0n+9C%uN^Yw zwzEIE?RWHBWSb5~-YT9>&D~~Zw z3)WBM)VP#s_gihMHZiPLkAUxOqceup=y!Us{mwvbzpjZH!O9~)zJtsJSKgZ$)n?{m zV{G;vbD&&X?Y6~kmSm^Dd_V1mPJKKZ%nDX^Ja*R*i_hD$!Id8e{(+ch2YdOft8Na8 zGRGi}e&+-`hKSF8&5iOCz&-;klCt+XBa z-NEKvUBosYSYC}yxv^FITEmXGa$h~LwXdGk5&QgL zu0GzUmjWv@FZL;qK8JLFm#x&16d?fGN;%Yx&+u{?aa3ST~P?N=z=bz)oBobOJK zZAJJ>6xZO&)LyPZ<*QIurP#kX-cfsjSEK09zU84?9qixBQD+?T$oraL?afc*Wi7b! zuvr^yyKJ)#T=@j%&)oC|pF*wNXLe=lg7u}&z8$l&=x05!GW+R6y*|ayZcn9z%?3pt z{q%*~PI=@`nS0D!nHTdDJ~ssG^R(o%A2Q|9UY^_QC;HzA?EJVM#IYWHk6VZ0wJ~!0 z-Gn;ULw|6r2j_d!!XvKD;Kp@2CHfkGOu2E{ULN+FgS9^+*>8bNx%T>(dk;6Zf#5+D z{irj3Wf8yYtT)BW@7Kj1+Nxmhm0K5V{M&#X#_hm)jN4cE4vG7{yqy!@1w15i z`xsi`O%=Xd;uEpoJ#pLbk+}L{6~1TU+V552!z+A5;`(c@@RkZ6S>dA+x4+iJ^*6f0 z$0Y9ka%_d~UASkP^Ro}!`Ep-+ZsyND_eHMWwdFI%eqiVGF!q{!f3Py`TtDiRh5Z3w zWp`rlS~(D`%xBTCIjGq5=4Y?Qb1*#O@xEYOac0=(A?UTayZ9Uj)`#-=ylFhxzSMdC z9ZK!x`KRtMiZa)t*fUPw6TpX4d{!}*Bf!RDUW~ypD$~wA<#WZ6$woh(Sw|r|nxehy zTkf9n{(21fSc>h`xu?A68H+JRUXBBsyNG2XxEhNx?IM=rlZ}3i>M7uu+sLVLde)qZ zPJP6E8rb&waZk$QdH;0q85HHCO8Yax%Hy-1v%vaPt`GOQvgq$@urhV-ak>5nQ=daQ zm!e#~XNt9!PG9C%dHA{roPF8uVr18mFLkzY z9+cU}++PAVCih3=>r%M#cs9KZtjy1?^x^y|3;WB##uCr1SAdln=auBtFJu<@%KK`rma znsVLT2)>D;KijF-&U0FwKD4_TtV~@qwcNRT9o;S9TPezYr*;l*11tB69Ni9AZjQ{! z9Yt?W)ZIzBi(*d1o(J}QH~1ckeYa4{Bd&YF%G7z*%f+qKXVEt9#rMJQr)Z;28}Aj$ z!`B1g7~^QLJai9&wYQ!32)Vl3>Ej`=bEa-AwcPxwdl>u(MSr$aubuYl!pEcF@Ub^o zZhk%U9s@s4QSKU^i~0$Q_OXUtZ%-DvXQJ3Vn$Pg_6xeqY+lB0D@G}%;w)5S`cFv8@ zW3F4dYc<+D3$FTk4%w9a*j~S9<2Rmr9MigRujkSG+~YgR-wJk*{vB+;FHrleFfcV>c>Y!4?^O7^iRIU`-77Gr^u9RZ@%TR z$36oOqUcARHqM#y*him(mBsts7hq-n%*8X|KVUD<1a)6hl(`nfzT1kGh0m|R`m{gi zSRUj1FW9xD&iR(d_`U|455HFt_TL~=uD$C)E_N-PgIzD4vpkc&g?~q}KmBT}jkc~k z^%39qU}Z7pAHd3D%s+y?9J9KgD9RkCIL55Z*j*bxgWWIr{_qQ2xiPp_0am8YH7@sTa6KWaO+`_z-aVl#&hn|j_Y|G` zK^}RU2CU3>?ge?o-wv!Vb?ys!th@H$`;pm3oi^?*<>9*n*f~(=K9Yy7BUpQN^*vQ| z;|aYJ*gUCof5|=D)pZ7Uq3F+c>b28eUBuoM?0iJ-r-kR-%eB#7zta`HzTCs|ID4lD z&p=V0Kj-mGU5~b2{yvmzXC`XTPS1v!!TIc*C2_xRIBVjbHM1q|dYe6Q{moIh^Li+K z&Iuol+%fvu%Up2#jOTv2Hrl&R<^~^~axxEGd)Kq$mTTiZXkKc^WBontyHn3c@lxNT z=&j}Iw6}Io`F>zq+bOq>ID3L4&X}M1ktsL!n8O9&%C+$aEe^K7ktwDnkSUM;mIT{QdA!Rl1vUwC?F*GhQp zgx5*9cfr2ruAA_B3HM2O{e(A2xNpK6CfpC~oNPqx9`<=+V{q(Ym!Rrr<_zE$G3-#YPaz}qISe!Il=zkTAyvqR#>yJO_+BG`WXS-Ww`wT*s`2j@9G0iNgdM0lRlli;?|PF?hKGT4417jkW* zpHsj&*Qdg@_cK@Hl4}$8r-5^>Pls#ocfYllYoopQ?=!>{<8gnT3D!R5US78;_GiJp zSHx#VXM>epgs+gD1NWYGG38PG+RwSjv{e^~SLeRD9_+qR=RHmyao?c4 z=mt^CLwBRN=)CvInjr5K0(>DTt!8N=q(+Srfww}F-U ze0Dpvm(OR)@1Q92SwI|Px)U5@+8Qj6XR^D%cT<%6JRI$TMC zz0_X6@T8&qK1x^k{nXv4AD}iCdBpM{IAYljELZ1Re+V4;-5xAg=bSwZjyc-_ELUev z9s!#ZMzJq_+E$wwn|jCXSd7(}Bi6^k5$jH1dBpkzIAYxyERR^9 z1V^m9faMYEQ{aeoFjyY3J`H|`Vqf~Stu_&>`W&nI&fGC*yDRl|jNLxRQ$Gt|o#OuR zUj7_do46OqwbA|p?4Jj_f3)9>`fn8NV?W8Y(cT>Y9qc;MejDl+DB8zaDAy+T=Zj$D z$?q`#fGaotA=L7CM|cVRa?uT?mfN@Q53hig*={##c|2?U6TC7-x%%Cym5KMDzMl5+ zxxlM%KNrwOoi%Hx^mHSn(FPu(!E+&J79uY;AT+lyLm9Iop(z*A9_tKXAanSMQU z-UP?z9K*qK{i=Hltju=J)N*zDeH*MVbuHBL7|*}Jj>k6Yv>8FIJbb?cR_4!{d`5T| z9P{SyGTBC%cG2cNuz688l3FevMSTNpw7Zu2eYn2VwNlIDvyczKA5v^@JN4SRCe+3F z{tb35#Jc$i9P37Vxi;Fn&Oa`Cb)&)ZIOjeA&p=V0-~Dc;-TD--Ptp1A=l%S%f_)bL z9PHWf1$F!^{XgJ%zxopVRpPe)Z-sxIxIfGMCUL*R_HE*}|E|Knukard_h-*PCT{zm z5?B9o;`aAT;@baOc>K)qH@M?=kGhXzPJTzGF79oAfJuMt$+9M=0xOGq`_ypTD33PN zfZI{x9^M|To$~0v1335ZoGQ~c{B#80jBj=I`G7~9oxsW>j?QppHxh3AjHwG;xzAkQ z&$@!Wyq~I@mZHqL7e{Q}GPf_le765xWCUpx^lo^lMJa{iy z2t2m5Ss3i)Gm`Q}C_W=CO5KfmF>2e(BPWZ4BPZ^AxzCH{WC`$+6y=V`@jGtEYkurc zfBFr7OM%0m=Y~A|Ee&3VqWrJ?8F%(?Kk^vAwK?=0x`T1qxA%c%k?Yqp+_Q8!xHj?Z zA=gHGze~D2*zsuZIk`fy_Z-r{T$>o%ir^TVXP#W0KTll=Y+ttX{FAG@8r{lZbEeL- zP#!*40h?EKo{Msy(dwTiiYfM`Pupsf*QRS!U*20*1G`WBxu!V(Tyq2TUjA*bxbLiy z@R|v)mGIgLuaj`^gx4+D&l%QBxKG0CC%i$zeG}d=;eH8k1a_TmOda>cO~7$a^q$hc z!Z)q(%_@9Ag>PQrTO{td23Gi>3g5EAx2o{1D}0*@-?qZHOWg5oU*S6>UjIxoaqV|X z-1v7cJl5SVaOdB3XD;QT8!Rq5^IGSuF`ol>RbF)FSRU<$fL$;8G~e>j4F$V?)R}vE z=$gQ;Cw0z)JaoH(U0>>)6S+D+v)di)dQ<28$V0aW*!8E*xsr!&7})iw&UurEZcni5 zQ=M}t58Ym1*Q+|`Qy#kEVAroY=T;uN5n$J|I_Fs)x@NHJTb*+*7dzkHcg46`2%WD+6I9FN|9-Z)*gvTblcf$K5yl=s--~AHaKj8xsJ}}{f5sFm}C=cBwVArua=TjcKOTn&dbs=}X6+;iud#I=7mapQlk@L0Fc!<~QE zr@54e?r&h%gF5pn58dCvt`BwQSRT3;z^)f{=35@R7s0L{b>?0kx_^LOPwJcpdFWmO zyS~&pCvtV3J1>J>Z|a;MdFWmNyZ+QUSMt#P6YP3a=e)^7_bS-+sm?i+hwe47>s6ie zDG%N2VAroY=T;uNH^8oEb%P-|C!mx!C!RXB+1_u=5l`{)&WX7<$L7+UnHzK9SRIqG8>6upgMHhlec4C#T@Z zaqXv0-1w&{+;#3fv>n`a;aWA9^3b&hyH3=ZS9$0>X zN}aivN4w5o*O@x!K_0p;VAq{G=R~f~^QJ4;Ixt8<>^p_>`(x>x6%%hfqYvw+

YRUh=(>U37wTLKa>(ey0yWs9d+hg9=dhFt|fKmULLyMVAqy9=RqF2b-}JRbf?d1noKJb^HUztt)j7BF(Dehm zw$(Y$^3ZJrcCD*(&gEj~JDzi#>%h))Jo7lmzDtF#jp6PU+dJR#_^f9WuxE~Q?VLJy z&XIFr&dsekG*{-tu{%b`V$8;7OvYji_H7@2j;!za9NBYcV~STkclu)!XV9hzZ(Sxx#m;@WB>?hy1l`!C3WUr9=d(Nt}S)W zgFJNmf?aFsoD;b^&z=3iu03_mk34kygI$a2oGW?g4gkA0)j4nS&>aYNt*Ubl<)J$W z?Alf5e9A+2Fxa)M&bgI`?hvqRTb=VP58XJhYh9glE*CrBan?B3ft}|#cbsFNZNk@h zxO>I+&bK_yokPK%JI=Lp>fAX;&V@NQx8~4XnG?tE7#)i-8=Em1i!s=@eR%HZJD)q7 z5R=zo$USQ&P#<2fYx9VL{T+`Z!S3y&sAF#*4UWC7%`u7F{@BD_+s74d9$ecK;r3x3 z^d(n!D7xdp_Nh+4aQ2&vx2t=lIp<@%MM|TzUK_VH4-> z$pt&MQ^1b@RO%T2Y2ZBm(-U|6XC&_U&n!H~e-_+69GAZ2G5)i`_Nh+4a0ch-)4U37ZXBQf^<#VO z%#He-o6V+>o2#*j++0(zW4X3qb8{Wo++0r`xw!$Hb8}=gOk(*oK_UE|t zC6C&!ly1BDp z$8%T0cPD&L!uKY8U%~x(J-okQbNK++Ts}x0xqJv5xwNl`EBuiPf3(6MOWgh*PuxDA zNL>F<79Ml)6x{KdM`MwP?rE^&RcBoC&^-fo{OXKR9=d12=0lzF%0u@Y*!-w7cDXvg zQ~NyFe5rFBa&?}ce*>F8b&g3cc0BHxUi=Hs&gI|XFDRop9^*DXR1PFfb%+dv%=r1@V6`cUy0k_ zJBi!pyNSCF-YY!j<$bv0GpEKP58Ve~$E(h`Use90Ms{c^uzi<2b&jb{xk21K4py+&{v# zi@1LRD?1#Uko^o-?(cyct3H&)SbhOJ7Ujk!kJx;M9z^l_6?x?Nw}M@tzbE_$*qHem z-lW`51y-~DO_S9S$`hE038zQd*iT-ov1I8Jle5pG*`jDw#kdq!;HY?~?JnZf3J7V22*-M~5DvlbrXnGLRw$mQ%{WjWt-!1WzA zbApxSe9r~9tvbhTzse%tbAydL@-z=v*%6E_Vwo3iyU1;Ku(D_~AJ}%v?ZdGsi`@1A zn_K0P+n!+M=2jfJb$+)pv0U>bkF~r&!LH>6!RB)z>ezD&gCn2XEK+!kb5XcHj89v6 zjBhcpKGoSy9^bQH9J~a@HtLL5S;XtVEw^xIT zMji344$kqeQQ>P=_*#j3Ca#^h^RrIj@!kL4aAWX$qcP5P;mRGS{na_=i0#*dYu}vg z`@ogQdA>gQ9`qMe9xdn32FUJ)tBdcn_XR7Db{m5ArQE(9o3dCF{lFVhj7gm~=0~}) z*FUoZ_oBr2lQ#hy?>PF-W9*MiUD$0}?C!1DZH8Q3{5`Az;EgG^S7!|7TzRaa&B4Z{ z&iuVl{xSlgxs~VC3T!_TY>Xh*}B5FDcrgD z?Hu3+0}r_OvSi+l~&hI}#9`o9NjzJ{XLpSmXSV(4urj&}MUUCHxq z=*_dwpu2-3&+Vt;Ug-%xx!lt_j~ciIugDbgEXhcCJ)^x@ES#D zobu4Mf?ZFxGiG_53!}l>=Y2i~t~_kUf*qUR``3qDo6zqK)^FZ-`@of7%NlgQ?h95n z5ghIIgWE3my+2%ev^xN-%yw54UkAeVr|t*hIf&ZJ-z!jeFy#=6>p&d+jRRNX9FI)7 z_gbHy4h1W_4}3(C9R^o^DKf|JyebR76Ts#$kNtVHfNghl@p&Xz8|C_RZ72($ zM}ez8k4C0Ed>#YVXT*6dSR3W~bbToc`{Tg&6VEdf!O9$0^m{zqwUx(m0$h2-ej-?z z?ILd{!S$yue4Pw--RR4BLVuJg0({`Fs^VPJ>tDIUSku821@qWjUTR;rde- zzRm(0kL%5N%i4mu1BUk#&-kQcFGSed+SE9{izFI zH-U{W)}mayJol~vWAUuL8LYkc3~}5u{M_Y83P^}# z*n7imVAtF2)bR{>2RQCG+T2;VG3oOzxIQ9ZcZ2ogylN|tcY=GsXAq?Fc(1q@tUTT; z?gQ&nxjyV)S@d^5SeZExM-DtEec$lP@9Ga=-xD1o%mca`pD5EZRN=j<)tIkG4;PpP?wXt#K$58-qVr^7n5Y`?K)p zDEik&w{1`nf-i;}!TnDcWnN-f<|8aoj@850r7diagfxYXv)JuY(=a8`LrHZ-Vof z-YPui;%&G-j7wX2jOAZoeX6sa+-K)^sNbc$N3o5%LF^4^dN_{BMX|7{Pi9RCN{ zI2Pmo5N!ParjGbO0_XTYE-@x^gpWDBMD>rui$;J9|?TGbrDLD4S zcW~E{_S&g;9Li%HJ_Gu@t)83TBRBRRik;s<3%f(XKI3XPsM!5j>>jAt`MjI$ewu=v z=ca!r)9YvSG3UP|{43aaexr{0{~c^DV^05oYZqs!N@eClTY0RFsldv7-?jg#;SW+j zM0vExra`8iy6B@F*tnvP_HbqXZn1tkfNiHd`c)SFb_Cn6xfe%H{rvLqlB-V09cyQ8 zsr~t7*typHJAtNaY5q^UF2&9>B<#Eg9;*>oSM+hVOBnfIRlvgCh-|7 ze5MMYx$s!8v%u{muh(vH<>o{Ga^FF))*w_}NHnxSR7pCyvB)&+6FIwS? zC9coKD}0FxU$VlNN_-b$T)OaB-^;*_)7rc7<%)K)GnUB@M=D^Q`{Ws0l04wuzur;aIqL^QwhvS)8nRel0ZLs4G zAM1dXg^%9g@DcA+%Crj~>w+C?_*f6DEPV61KI7D{YkUK^^04U( z)=$JC*CzBEg7uN#mHNSzhs{P{eMB5`Z9=~>SRd*mj!odo!=^u2A3mEoFLG@{zbRNB z`PpqVxbm`+oId2**r)q<^P+cei(_wlkG-Jm?JbbUxjYc;-X269=gyX3{+qnF#(EnK-V>Q64#*Tp4v{Y(VMv%+?8pB1#%PJR4sj_twfz1KK4d5q2b z-%Vv~J0OoUb;pEvN_c0mW8Q^2#yl9zf0OvG6+WcGhgNt~h3{73yI1%giF+OnOWb?p zo)x}V;@$X}`0&JSKO%AU&55gTNnHOU3y-xr3U0ofBgZcHJaIp@f=5%7t2YRq3G zSQiI?{S114O87nyneyNVfo&I`9UKhSM!7znFJOoD6oDbH%EgVe>{6016CII%VWW|Q6946z{(%Szp=YMmANkb+hP;J$5WK6 zcRee!PxU8&m8o+r%WdnpPXzn7$CRsgZ7UPIu8+qi@^}*5ztyFUI>)Rm&JO#!6}{KV z$m5yk6tH0-JLfXnIc~4{J4_zedDzDJcs|(n7f^c+dQMyj=D$hj z;6)XFapAExE`jSO*2bk^WzL1Ze8?lu{w*B; zT@h1#J@QyTHza&x!K;zqo503-Gqt|_dp_aIcc;gTuj#1uWt&?{n_H>#yM*@7Ou_!P zVt;$Z-nH|cfpXn}E@Hj2VDAfeC46_n_kfN2Ug}sY_ksCu62CujeLYa&4<_zC>Y>D4 zdk-gW`$r0o`Fj*@tga#ZlE*rH3~cP`>{lMT$H9(8oiWIL=5X&n0e+IAKijF-&e+t& zne!CbGe^04$F3}5dm5}vo#T*4|IdKUpE}1RkMrqSu=~_D>Rc1bVoi9?`1cLHoHB@{sT5IP<#DSo1rd$5k4*aAJko`U!v|p{W5iD>Q|^cQU8;g z|7x%DfBM(Heg+|v+fMHH6y>(hTt9NpVDfr^(*(ePi|atpX=ntEk6<-IT#P_ zLJnNIuc421`+CA}6zt#Nd=sp%x2W}XSn}nb@oyFC>+RI$U!~1E)YUz|IkUV(;}qvH6fXo-_Xq=D$h&qr}|<9~U0a4WGb$ZpiO9pCVK4deWaf>^}o* zpWkIZN2XkR`;*7|`vUB9hJMs(V+_jUUi}~N@f79eQyz2Unc}|i`VzTw>T`; zI>!DrnExj6ZwfbdbNVgZc6qGd!Ic}A?d4(rJy`oZ)*s-?wb#Eq#`+`JvFb;iV^J1k zac%S}bNv(YSZ6;MY>dAY>^l1u>==Ke&TGcK=-;SwjSb=6tIhAJB4yg2RcUmX3L z%Y92O-Hg%yRA7BfO&w?NG+>%e%4NI6T}SN`H|`FF$9dHeuHSgS>ZG3HGsF+%t~1!n zd#t)H6lKOC4nJMN`ibYhX~D|$;~W^LGS`x4*>qrkKBQc|aVrad(}T0W8Q{tuDgGRn zvhX(}Sbxg(=Qx$coX-T_h&iwyb=v4(xz7_ZChLe{X0SHSk2vPX_-`)r8lQpAQtUjx z!p?W?TgyClL+?ELJ?2>pc7A3n*twZK;W-kX6KuZcqITZ>e%RdLn0NU+6+Um_j#Jx{ zh{bUkdv~}#PJ=N?#!+RHtlZfS}#=R_PiHYcv7&{=y%4PcZEm;A(%pkSRZ} z%++dOW#MOau)dU=uQjN>%$K?~Day>3xSFrfSw|defva(>ZQEqCPO-`3?u|_Ot!2%u z3$~qdbK}}t5A52Bwblo$%(Zq!X}3Pyw(1JhUEb8=9InQ z-cOb1&nCAh`e;88u1veg#UQwT)rHS3!P#fbfiZ1`PJQgLt-;E&&u!rPRcC&;rS>wv z>b9dObNpiW)%Jy3`;NE+^^TNi=ltyiS8h9T|EU^tzBBlG^!Bw2*vr0@52h%yjoA6} z9=t2qIdE>xyWIPY^ECuKl%m|&qP=yrZvxvs_-=)7F;(O9i{0VIq’iSJ%jCB~b zm$9nblcLO6#r7TJRW3It>aBC$w3A0(t@HEBmGtA^@%P#bU97|51$&PgQLy*1=7PQd zw1Dk%B(-aG8}y^VagVZXYlV+ad<=N+#I@h2aPzOPYlthpqq{F$nQPiJRU2hz)@>SR z?tXA%jJ4^xxIbKZoHGX$n|v-Gh)j9>yU+)LZKvG+JuWstP>Su;*~YW$FtE9f{XYSGI3>Q5d<0k<<&g_>c_iF; z)e=6E!eevTEVXE z)4{IoGpJ*2p9#)u`>YB-yTZ>Y+_Bg;ujO;$%JN#S%ZNGF>3QglDX-=8;mX72f?|`` z@`cEh$6CG!Y&+$#mM;cJ|1sanv^8GGme=wn$v)QdrO33?-m%MLt}X++mTjjl*7D_G za~o^$3UFS_SHiVX{%FaC^K=#5cw_C!wT*RjbNZba`oax6E2^E$dYai0NhDcrI7 zGmKl|zSsM^36cBTkSRA_{mH}rcChx3CHp&&Dc9cqj|I_BsxaLkcs`QwTI41XeV=k&=6f2zWt zuJC6n{Mp1^1J5O{|K}60e_t?h?f;&*_Aeyv7+*|W{XZ)Fr3!yJas9uNxb6R$xc*;F zT>Wbm{(9p2dn0k}->mSr64(Ci#I^rdg}+nb?^gJG75;vOe~`HSeVDlY{ky_Hs_>61 z{F4g*w8B42-2OgK-2T3(@c$&P{g;Vr|5f7Z|C_k_uM>B^zDZpDw~5>SyTsLhU*SI_ zuKkaRYyVT?>VHn${(nha{jZ7J{Pow(zfCvnfyc@x*Zd*Y5~zQon{NZkH< zCa!+|3SXeY7fjst3ngy*g%el5NaE@jP2BM;mbm)GD}0FxUovt1FO|6Mmrh*$GKsr> zmMz?Swf8EY)x_SXZY|h*)NNqz;l4*KN1Np--tT_Eb_KAP@0sdWq^v~oUMTk2X*l|o z!SU~|tOEA(-(69@Dy1vD7j-x4)u?SRk7t$D!QO9;&-Z_ttp#2Nf6D*LpK)jZ_9HhZ_GcdI-$f(WYlHRWv!~C>@jkPT zHWc+S56U9Wb-|8Do%esa&s@&Mdf+}3{i<_5)`xpJAL=)tC^H^$#M2j?kMV5+?oY8#bLK963=JpEV9Zu0Qpg@;Qq6+Z=p7wK{XR1+|yCQ#X(@h+^)<(f^j^uxrm;{Eb@f&x$ADcWdxA6vtyb_1gKIrp~>h-L~L$sMYyQCfEP9$hIr()%iRo zkF~Ttcn6Ad^*+BTi@0|LD~s*#M+ur|Sm!0rFWa+VB*YopxH<(jB>qj)(7>UXCo*SA<-=4cPFxiJo(E9KG8 zFtBk3-xF>eG48$K${e>pcXF)5;kHw!PoGVd>BGE_06TB$d`^{z|7NiM)%naSj~upu zM^bE~P8*+NmAfu{w-^O(r8o}zRc}tsh2yr4zD9%XE8-plSLSCZ`c|hb&x!f7y?Gl8 zj=cH&EDzs%gX2u}Sy~?L_5r(2{rQZ3_Ju2Z930<6+7GVWIDGb&#~AkqJ4XA|xBBpX z0P={*XLNbQbRgK6V!Q{zmBpI%8D3fVIvAYeJOutAF+N0z?@*0Hrk%QoeLPsXvHKn% zkJt|d8@sXDpL*?lKe!aXG1rG-6Z=J7d~PrS`Qa4XyoX#K<2wRemz8yRBwSgH&vy!C z(Z^BX=tG@8-=|hCcK%~e9*xZPV;%h+1J)+^v2gp3J$W2l8|CiFiPXnayxf!OPoOB* zw>WZtA~@#pLww4kpOe7G8T@3pam1cH1+Fab$y4FBQ|H)xe^M4cP6O*ho$pfeI15h) zpFvTs-ghfyF{fvOom1n~uX^{4V|85C(a%|6`-wGpHe6ZWbINQVIX6GHcdwiSc1^}w zI~T6ZxO~TxM_=cGUGM6A-;>AOpAWu(VjFeZXsbNhUkJ|Y=_0r?^BOi6!)+J8(|QS9 znYQ6mnb>j1db$*uxv>skmw~kjemUI!V?A8~*G9SP=}PLWC|<57^;c7r>suVYt^vpV z`mQTiXHKsL$6E8fS03%I13SLp*TeM_YwZTOvb@%AgxgM?|gYxeaVQv7T;+E6eLine8Lz<|o?U0nYPxCtTU37vIsm7nwTSxUS{V*L~pV%g+zw(bxT8 z`-*Wt09O`gou4l#i~b)3+rNI)MgI>WQy2H5hvkgZGyc(Xe|iL&cIqOwN5RG$-^+Xq zu1wn)i?WFKad3WKdIGL2@1-ZnHZvD{@NSXa9uTPM!VvnUJ!W^H;&nxqYeA#?OhA$JzZF_;rfoFkbcU z6?5UZts|y4z{V77?oGI|yw{Z3KIX*y#rWR>yJma`@}2r^xOwzF&VPqF?EZyZUHm-! z9k6Yc`)=_rwU_gx?mf!;6yp_#pAW$H=eYemOdezT5FC5e&&K2t*T2D2it8if>h$O5 zXL9|yCO$50)cIMOJpL`sPr#p2jKRKatBrYbEXHm8(f?=QDfRz3a&`9M=X3HH&llhr zkDt}a{rQ8R)%^#ozY)Z#{dnpxktx@gpXJG8O?)NhGamcUm--y5F~|GWf5Bf<^y6oM za{F+-e*;#gZYr=`JT>+8=v}AUeGB({x7w)Frbewie0>L2rfwQ)xwswm4X7dy-@_vh z?ZNWM>kr^5<@HD8>h#xvTCP9m^(U}x)O7^QG;}?ggqS!`VyrWNzOn>U#Gt*Fexo6b1 zqqL_OpE$-BtMQ1dez0>CwquPp_ft4DLeFpY7C}&#u(! z;%A0k!OBL`Ci2{X{bzO4fsN0#+)UArGVR1O&_4gWywewZ`|xwR8Q|L3zHSfK zCdM%%*l}p@=e{!)`{+xqO~f}dIC3&0e&sPYvw&kh)j1E{ka;-|>Sv|QMsXg*_Uq@X zvxCir{o0p5^P2;ia^s#Eo!n=1_t2bRzx%6P{VdeV#NEKhV-x#rF68bvZPaNq6SeZ# z*K>noU(bq8ZXEVC4_KM)W~Y{m=Kx=y`k5EG{b-|3o7t$9hp+Bn`&Kt6wLEe=A2{~% zTB6dEARv1g}J~FMZlpn>@eHv)nV&d1+4_d0818d0Cux@`z~_urgy>l3Ffa3Y_z@ zDsuCpjXG_XpjIBfdV!UxTbf#~&b+J!j=U^`POi@T)#~6lzn4WPkNf%>+EMgpJN0>- zj#s_~a>s3*znkVgYW{ZoAJvs&~@!bL6 z_xzcI*LujqZ=Zrax7Sbd4GQ*r?pv^*sc#52j(*hfy@8Fu@pDLH-MGRxN!(}B{uRDy z;IS<--Z0|k+}B55?8-xh3}QP z@eQx=5sBMhbA`7gZhs>a*M3yu3$gH86W4xp;@Xd?@Ue;e9ml;BH{N|JeBTP+FLC|v zU*QKNZhQwOuKhuY*S}*~;fGZCxWsKgK5_TQp%s2ug-@vP!z=uV3O_RO9r1ru;`V=Z z;^zC9#MK|0xbYpAxcZ3|etdp(SJqylbc@D13=Q?eMQ$G*?KJmZ7zk&O`aP@zO+fF-m z{;eJ3c>%00;*jS!j3++Bco7`Ah|hC;C;JC-b^7Q}Esr^R34C~Y=h+M_kKDct4u3hf zuOL_F=fcMLPq4Cy;P_nbRk-rwklF75u(I&|8hA>+Uq`Mke7^x!=I=3w?>FJf!}sQ3 zW#RiR@RWSNja*&${ufx;6Up~GaOL593$U{A{VsS)zTZQxE_}ZaR`z7_{Q+Eg_#OyW z7QR0OPs#Vck*f>eAAyzKlze{-S026xft7{tPry_1{V8&F;rlbNGXDld`%aQ(W6 z!>{}H4`k|WzZ13GbI^A#zbG~pWk+P{^yM@D)X2hDoH18nJ`Hkp`q~+LxwtR*R`j;_ zouFN6@0o3$ucmH~T%B!pp^aR;A=vN5#aKF|_W4Zeh+Lg*2Gd3!bJz)NuGQ^IEe~C1 zusKyXgjz1%0DOGC-}d$2mFj{##vMQV?uu+$iaOg2rH!(Pbvkgg8C}L|-ls>d&NfZ7 zkw=>uQkz(>wwV#Ry4aI5ft5Mu-Y;hcdwIW9Hw&d3#e0=F{Lc!Gn6m%bkgM}ECfCgD zVCT*`GETWR`tjLd4zPaI$2#y{GbeI&(cfIf&bi9{&5c~0zIVruJn}jZ*uTA~Jbo5A zFEZuhh|hkygY8SXK6b-ane(cCKCm)%!>Hx@_iwND0Q>h7m8;)_TA6Lt_XI0bwf7kM2ir1UeisKTI~srb@ElMU<5~h7(aA%%6gc+C2z2uJTy$yhG8FyU zPQ81~z2tL>>(VvoT8y=`EO<(5XF25RBCqo7Ltpacsr6}XjOK3zu>0P&&D3(|>_l`c zg8ghtxu32VaQJPZyw!Arb3ySws5Hn+;0z7 z<`@pfRvztl0NXz9+qT~kneu4A6Ihw;55ZRMnDo6fSf9anfotQMP(K*1jdI(ML#Hh4 zcLghpwL1h}UAse(DUZL|&;+)f^7tLx-N4Ex(q}v~jmNL@$p7wOW$}Kx2V9wL%=s{| zmpNCrCqSj3gN}wP&-ZlC&oRjK7j|Re%EIn(=5IK5dn40_`gl&+2dvyR z=bTQ!k22%Z_r73d!S{o21-}BH>i36hqulm~qf-|42Y_vVW$||)Tv_N30&ki89Sqk- zx&Dqor!4Fb0dI{g_&B)p5%G+NYok1D4h0)m@WbHxjdzp@aBcn%U4H>~NpW?L+k?A% zNJ7vM9D=*M6A13^ZV}vPU~qRC+}+*XHMm=V;FABkp?BW;`r~)5tEbjld)KaA($#(X zobwF5;v5hx7MnZ{ia)Jl92iW)oo5GyTaUGd4-V&JzS+cJr&parf@w7uhlbPCv4#%| z_OgcA2Iiny!*InJ6kL1JA&G~xxdt8{>^i0WLsR13L#&%?-4VfqbEw0{=irp|+Eb1U zrnUD=M@6IaKP)!9aydGf#x^J=UbbU`ohNLEr^M?%^w{9VqFJ+TTgOFbQ|E}-ai57< zd&dW#kV8A-vGW_4l1;n(P7J0MKPi0W@S5wB!}-wF9~>L4{7(s1zt-@n;k2@!7Q9Lm z@APm!bn%XijaL3=1Z$)Cnc;kDJ)RZLhpzrnvC+!^>|okC-8pzp_|IDUYTcY0?q%Ju zotJ}V-N0fVo%;2;viFliqVJpMOiM&()0RE|{OIkTenB{&+AnZEwO%d^b`5Ih@SKqe0y>h!b*tuygsz0|xqnFRE!RE_+Xcy;G{c`TEH>?(kdlAJ zeIWJ~qt*HGU^vZpHp=IraBY{*!{IbOwJ$spt}dJN>d};5&MUUZa?qStaP4J}2iIQK zj?oj*+2nj;+Qw^4pA7Dg=~L0@{7;HMUi0>Ju=kH_C#S^ii}w6yg8dzDZK%i2@065m z?o)jy<=J3=vzNY7H0Q%}(db_47+aeA;OeWg#_{=JV_p1(aN}Bg*^A+P=#~3R!Q$4v z(aYg9b6E9W30IfR*#9e~m$7GiHOFf?j6E#2=UlG`TN`qB?}@Wr7~31c&OtWMuyFX) z;C3&3GrGM{eKtOecRn@8Z^ftPxZUgCj?Q*pH=geV(+*Gjb)A1VoL<-Y_kz9jjqUv$ zG<}1O{cIhlvM+>fd}4*aIKNggX^B=w2VDo@72Eu{!b2i<@aSc-QJ;{ zuY$d_!}fI!ns(r7=bPYa=k&zGtDSFyC&}mRbYr|hN}4gNSW3M5pu|^8sjbyhR$Jc% zS6gQ!7GCcezYqQ)2VMR@23P)UKjrvu4l&`1>5tJ?%(GG#XR{9s6}){kxu2a9uY13t zg9j&$I&6H-Oi8ajX_#PIyS|5wM(2M{YVbuNt%{A=gZ zh|$@^J3n>sy7wC?_=M!I9r4)fIx=!>?Co{r+}P=r&nUsPIwwaBrwz=w@p0ccTKLNG zDL#6*`#kNij}fjOKQ?(@5Ieo&#f;G}jsPJa({`b;>qQ4w`v}#k?@}+x=Rg->uR6aCu4_13)%jglr_Sl$htmdTd}?kd38&kK%&++NX1epP#!iVlkBp^r zM?21oKg3q&#igl>I}d+;wjAu)GhOUUQcfDJa-1yK`BvjKdARX3Ui@)Bf+a>gSD# zg_}#~{&K;SXT8wnvwTWgjsNn&?fJbzczb^1d}{7j3|=QO+C6urXms&6jt#FdTRC`@ z95t7J38oE9Zu+=tu$Mlvt(N1jIrI^(JXa5P?rUR{lz7?JXlxtAh8u6O#SyQ3*9=~v zi^CUhebvE@y}HI*UFYmt!FA3$=kT(v9bD(Ea}jUjh=}3qu*3Ur`2euZhwGDzd%whi2+bGyeJ+_T=Y?4Dgxaw^hyjhN_ zw|Q{YW7{GJO+C2s-ZHqx`lhsr*Sgp$xLp@phtmcou-vu@t~S`V%|X)!Ty1O@Y&}>P z+Q5xt`6|nI`(SIm_zvOf)VS^#&WFy&`Lk27bEf#t;md{Bdfp|R4}C~C&btPS#ilR2 zrL4ZN?Vf|CFL1@)BUtP@Pw_hM_6)A`vhx31G&+C%!u7pkD=YTjgY|>&%_;GU^N(Ql z*=|XRyLYia|1)^69O|&~u@>p@t-%B1SNA%5N8cxhwZc|y>>I7xSTi~<*IGA9+?q5l z)?wA%FWA0PW3hj@`0XBaKs0*I<$=NK(apg@DZQ+9wu5ufv<25V9TMEm0WPodRn~R# z(BQ*z$fd@G*7iexD)zwOb}bDG*H-0xcsL)roR3IZIkOGUL6b8qu6^mq;C5fa%XU<7 zyD#B&4Lmw{v9#abo6!a)ran|#O1$$elfhSLs@O&^X6_A>5l$LFBw z11xU6*QevNcYB}`pQYpcYV1#l@8Nl7VeG3vCq}FO_+F7Yq4BMJPYSkvtIo+y9pj)5 zjh{N6v78d@*@!y6b98D`#~M_J#!sCKqn#G)nTtBUXLNc~$9erLnwcUOG6Ae{JtK)zv1TVfrg9sV8hqX{5;ff{tq{t{gH;N|7gS6 zA8WY&Jl^9^G+g~Bd;F;$f4aw?X}I;LCH{zZ@fr{R7J@@2!t`>Nsk^L4}7ziGI9 zzHK=BcRl`nkN?o)Klb=fJ^tSwA28bg?|wQ|j}P5&`47`@`3>9S!}a*^Jw8H@kJxbI zH&VmJAGzV$AEn{!qxSe{JwAGmkI~~}Hr)7+)o}60?(uPYeB2%%ui?(e@f$Aw1PvE| z!iMYLL=9*EO~cinxZ&)-?RdQ(`d#=Px#qeyyH?_LJ^y_$t?m;i38%ToDxW{}d?pR2 zd5&72O-vT9E}MI^$y0i{H)ES32hFt`u6v{@gI)LCx3tenri#ucXV-JQ^7&)1oY`F4 z@p?}&b?`&!t2%7eXS(~n%0Vdy_3Oo zE4KR>TE(6<_}6037M-nP&mK&>uZcZJIK5)KC!$sCIfH*K_FU1~D)!vLwELUb^MunY zw)-nu#hy3#*J95XovmWeA56QViM>ELy<)ppqgCt$gMTgdLebeO_QLAqXn#wxNI1P> zyYHh_>_vlrE%sv3*(&zp!L%Vw-b;kjE4F(`nzieB%#y*{E51}XpE_Tc4(CI+&fIg- zOmOAq*}6F14=x*>t^Ag2{Ob1>{FaZ-#?Li#g~qS_Ey;?}+3L3>D+Q}d-zV=~ z#9BF6yKHiGKTN9}Rtc_L+Z_H9olPD0%(y!C$W^;K&QJe0gZ_EXxmt8Kb=+6uuzTu( zvBU1CVX@p#|Fwy=O&801yVaw!spFm-hrOS4RnD4aepTKR1peq;E@S^q1Rk(8ceG=n}zrL zvUxOqY!!o6ec2+o#=h;#meJYN@qP!db6~4rYmm)*AG~Z^2U}}wcc;W*?`IEB|Ek?> zqF1}MW{v%}(b?4Tz6r1Q@Y^+Y>RHA1(MHX)3pRD`O&wabvqMv-{awzE(b?4T{tK^h z-6`0dvw1IuSG=8rEAQGDHZ$%cJ~ySaKa`TDF8hJOG&b*T zadq|YpkSZL(AhoPpw+k?+|_N@`XSNibsilW-2Z&-uxR|)#I+yND(=8wIkj;IMWa{T z!-Hw{nWlY|RxyqU7Nb2M21lb;j3a~l=lD_4>>q69cXY72Z1VMb^JzDc8g!7|o!?{AMc{(w;=E*Y#oXs`uq~O{^Ja53uc5-m-A)Y

fkiEwJt@F$|<=m=s;nd(?I~Pui&Q|LLZ)1p6=gH~86~j4IV{%4x zHZeR0!RtIZGkA_@^qQBmqSe}}>&@BG__B%P`3g-g=cLphWwm*3usxZtXDzrk-S?aq z?DtP}zkeDMetr%wdk^~sIq2fT75Bp6itCvTUibSK1^W(H?Qa){(*`DP)xRWMJ$XHs z60fniG}!m7==EOfvS{={i7}+h|MGCL*~Iazh^7ta_Z4i>s?96ItIZdp!S*S7 z?Ne7rqt`xlO|Y1B?RoY@tN7Oj*O!PV^zHbYr)qTV5;k2`38@=6M`mwg-dl4{UFx#LMwhDUgzqI!Ba=0tLvF3t@3#(SkCSB_2p>v z+FM@Yx}Qg{Mf{G-KJICSA+X;UyDYsxUUD(+Uqt=zl`l0!D5L2VM?6Mx_mRZ zu5%y7hL`QF;2XPoAE(6YI`?+4>zsJ%vDbC3a(^d2+HLO_-i=1D+}{iC-!Hr$&HVyf z`F#+qE}MRMMog2d{o=!5TJev<+xvx&!}-w5=ab;}e&N$_espczpLjI>#_`|5HIARg zhFc%j_GiKF>*(s*i)b|#pLccJYx5V;=#|fZf?Zpy&tHb~q1QF(t6=)Tw5c7>!fCbV ze;wTJ`QL=o_&Zm>4fb-bvVE81`y9?yxbpoW*#59hd@H{nqtPpupMq)j4g34nDgPTz zZ}00}2L_D(%X9nk8!DVue(u@SA37SFICb3`CY-)a?A2$Qyu=<4!-ab`P}iK{ z!};7DUp6t=={0U61g{dU+8i;QHZXOJ+epD)#*J;{95mwwSKFfm+Z(iTUrO9Ht9+HX zc~IBbR}Q1b*5)u;IIVIRJ-BjU8zaYev4h*O8mIHEu^Km=k2z(l z^)Oy=tp|O>`Ra?jtvE}FYAQu z4>=~yVSM1~=VZav&uVA#X!QE6)fB-rIr>ZsuliF4tG{j9s`^t!qgVYu2Gi94BEC3Z z_bO8di&^|n;e6_MUDJf~p{wsRGg|pi8%&dr`=RN=`}afBN2Ay8pJxbGk8Z4HOzCB; z*k;N>Ggff*Yv$k@f1l0aZ1&w*g6o?7b!@oL8vN$-&%wU;uMV4je3_D7<2Y+Dt-Wr~ z7LCsTo7nKmW%gj#h1#R%2(Rl`UDM}`#*eN1<_e}?pPXxtnLC_5Fj~c*C%lb6Z+Jic ze9`!^Rs8vb>4Orh_U8q{>23T4!}Y1^Efh{`>n$8kS8qt>*B-k_xOi;!jC#>vePFBa zur3x%H+J8q9UT5Hxcy%L;?aGt|NQ7|`fpt>5xrekONR5=CN^~ zz0QMWf@xJ}+2H=Uvs^TK)mc86R_DVC!Q#+s&94}IL6aNKui9BDSUcM%_nP08qtUCK zRf1_%=P$wi+*XZ7uR5y*(<-;W28%UccSkCD?B_wV@t6zhP3cxyJhK=B~khyGgI#ZtfP1?xl{grMZSzUzIh7 zy9XQV;(PE7uf1%~a6a_P{cpkI)_(f;aN5AMS@r%At}dIg|7S`sW6!o%j=ggjdsu9r zJ?#^0ZOGkq6KC_e(!RmYUAEg&;_#4Qdni0?#-iO%_luAHR4lfdi zXmI=6%)`QIuA}8Mu;()s+j!5aHFKmNz9GOF|aIJx(f}hEl%6+)> z4XqS#MaKS!Cu;7J1)oZIkW>;J0}EJJ0m0>UhSM1?6;Y8c?_SD zR&`GbrmdK7S-zgOP7bHJ$E`y>`#qbnmX{pl!sh*u_eZUb4{ogZ8lS#z*_3=I>3kCZ?h>9xn59ZYM_mvf@g`HvbKUb&nbOk*28C0@4kf<4n=8zVNn&dnjg zzjkh(ADvB|F;fSx-x^&I?6;uW5s$sDOBcq*J}6@_Fo*i1#ZIq$E()gAIeu|CjXxjP zrAxxEO3uYE4Zk{EJM5Q*tH+N`J7Xm#z3N;ZOsnhG72&k{E$EfOUgnwYsvI=)42wB- z>bHB`)zRyok*((In&{?ho5ZVrUK@>GKGy}?Z|nX7=Tr9UgT>gkiE%?Tx?IMI4X^9_ zjlp%Vz*gf9V$_XKQJ=a#Nc?IE{@)67ZD?QP+7``(}&;=3Nuop&{M zO1#b)WqV#u5?<}z9_+jqYrN!xv)M!L2)19ajUO9s4gUOWKKQO2-dC%~&d)RCJEOzm z+Iy<4yF35l_k?@jUB4Z;H~eRxuKomxN#k$L-4{%&`uB%xug2_waGEx2&wnsnJvMvG zLn*ziIkty$(5yMQVm}hx#(p%MCbl?_1$&9Z_IM7OIB>;zBG?$!XBtn2(*~ukF=C^Y z|5KfR@u$PvF?uGP4_$rZL96TFv%&52qvyhD1Jho`d_LTBsp2n$x6fr>3|EgIn|3Bl zJM^maQn0{{q^=;!@Ildi@u0L|eoflPKS-$TC^Qn06hWF#W7mZ$hem__}x?Dd< z=_OaT4|CAu3fDM%6l@&iWqk0O+apt7j#W=t^*#>f&v&AXJzn{L5?t5D-^7O7gPgmc z2Je-6>aeMAt<%+auKhdM=bHNQS+JLW&_B=dMGpOdtIhueYqQq+m*KQQsjCh90Zo6z zR9~E`|5b41%olHc)xqmLQ&zrT2Rj$!`%SQyeCgljplJ_Q$9nuOxE*&~p3b*{u~%N- zNB0>4+x;nVHlLmR5PU;t^VtVpwjYD*vy;1H!|S^LQ?Tp4Yti;O#HI}l*2l_MiPs*b zti1jkZ2zzQbHEsRuM+L>F5bi$4;p`SI#e*t7!4ilWsK;<iB$0ZZv+*d%wvVwehJqqeZhu`2RM2$Lsn&da&PQ zsmE4z#t66PR@-BSS0C#)Su}oCXRKi7LDd<%spI>Q+NbgB|Bh>}VdIe3II+zUKi@$d zx5IM>kJn*iH-6(YL9jWVFlGIAYog%1_4t|l-qvq=eBvJeZNs(uyN0X(`-b~X*(41g zd89lqZ#es;4Hs{+hFdR_H(dNF8t(t6o3i2TQ#D+?KQ^3w>K^}7k5AKZ^`~vP`qMR> zefoy8&(LslF=NBc$4m|9KXb$NXO@Pu|GDAXo3-KWv-SAwJw8Xn)t|HB>d)11_PHC* zK2O8-XWoXh&)0DI&);zN1sX2?f(>V1sNw7jH=KQuhFdR-Hk^I2hO57L!~LJ5OLV-> zyCuV&2X$RvDwt+pU1-3FKc7oWhdYPtPut~C+IK3(GO_Www0m~GY%uNa_|(14a^YgL z*^`$~>19u5TOr4aIh>EMJxTvo3a+;8JFaCbM_VNaoAcG);5?86}yq>SE z8tnNRy`HbF7EJf5d$hlX(+5VA^Xi>l&TMPspvf7o@mMohUJGR>F$Ze}S08HZjzgo?OdF?8$_e? zw^!lyca%2__W$L|O%CGmaeb_DP~zs-xR}#wZ=>LTdmBfiYtP-9gSc4s z!A*n3s&(kvx>-1#|Kurg@8MpEZS&xfNB(7h6^osp{g+K#bG1b<&3#Jcv1K^D=5wpy z>PI`DbiP$@>)`g>+9sS<>uK9yFYAeIyBsv*2FuAFvVE|es;{0A?+{Mc2YWtleOvoG z2KzrJ>FllnG}!*{+^%zJr*Qx0As;qAt|@fyv&7puxc+a&se^HSWZNZ}rrtCuad_I4 zmp1L}8m=8aY<&Kdl3uZP3)VKqQ>g6IXqH)_2X5vU2=Ka68}s45tlBTjjG?&u8y& zn&%w#K7OBYb=mAY`=<1=PT2O#L9*T;->x9nk z+De0GOpP5tAYn>b%Ta`l zt+&&Htv5RR0x4ZXNJp<1=4My0)yTGvLm)P|BZqY~J^u8D8sP;b6S3 zmuCgn{A?HB_Ii1CZ0y=wBqgrD?nTZCu6lBCy*xJ>o&TaK@me?M1-o9#O%CGmv2JQS zm3Ymk5??;0@m5yeLxQXQ?Rt5i9~-+o7E6iOd|wb;W3hNJ?tPZ~!3%>cX04HZ68oa) zY~n4E5_c{2cQY=AGmqL4kG*|Q=Kc5f!{(p7E{Uz)lU>^2%Nl%ngRf}tl?}eC!B=2H8@9XjVd;EbOf3U|NYWQ-Qw}%`4NbsW#cR%@9!{z&Uk3Z4lPxkmz zJ^pmVwf{`RwfAhpwf9_)Ki}goG+g}`d;Fz_i}!MmztV8|{Hx*oU+wYN8g9N`Z@Bi~ zXt;Q9He9^7di?Dkf2YUa?eX^-?wP^+4Hy4|9{;e%KkD(18!rD(8ZO?a4QKy%!`VM; zxbgVB;p|`Z_#-@lhIXenxFL z|IvDU^d29h$H(mPv3h*$9v`RS@*lV1+8?jub$uE?+%>?p#$Jh+ZGvFe1UCC7&gQ#w z69(J=+3cZs*(M5hJz%q+;${0yu^|koq;rh2? zuJNARPZ6#TKQ{L>ep5eXuxo+qzT9xW6=$ko{Vo2-aD6O3b+~?2JAVpShaX$DGfl8| zw(Hu#`Bt21gI#m^Td&h~{@Zu{IG^&LKG+cdw;ASI3L)x?a}D!x-XnDy893| z*EPIsGX=XZVRMbc>wa|R;8}90!^X#Tk`B8Dx_^dU|KN)I=lEE|b)hpJffgwxvi^M;?>i$7m9 zzHAkL{$OpeRs02l-IrDT1;c6QHTf?T?r%o4`7a!eFI&Z5Bv>14#?$=+Og;jy(4M&J^Yh+)>Daxzm*Trjj%9PWM}#Zy<$^13_cOTnP4? zLA9}Bu$MOIE9F=@hc@79W0l})!#xsS{r^jF_22yxUba<(&4Dp>?}W3Ni`9Z_F5E}q zRqwCC&I|S2Q{iP>J=lFSoBJ!A&Az@yu={8>_gc9Bo9>a=)(l=Nhy3L!AMJ`S4uAEW z_k6vVGCsz_m}tW~Sv$DK!u=c0rcdhxSD)O=;g$Eg!MAj6xUa*@wqEe9oy|QSUbgjv zYb@OF;bq$(xW>Z0A3oQBk$>**hQS-{pq_kXTNm)-8k6z z)P9Nku1D*yUioj*_*c%GMx)m=gw29!^_`K;gT1Utwk>jOnL{pc#n~#@x~qL->u{QV zuAYZ%6Hc$^6Wa!RiOsfM4w~3-<-L8dezt#4Wrt|=YH!D2+98Q&EZu9V6#?UUd%&ekgV6>VA`wRyiLWtdH&QJ`agT z=l^YNIQ(6(bzF51?dopd^+T?QMWgfoKK{7hJG&MP3?7ujxETv$pf7ULj`-?SzYdRY z&AInEc(r{*u)Nzj9~_OYZSR9{_dtFRdSviXImBV(Hb zndQmR=;92M60deo3Fg0j)9$I!==_IGiEDRod`=5KJ%>1K_SD*&l(@aoUS&Vj&Y~H& zGlI8F{dO+Sj7Hba@G0@~KP#Ajd!C#fjn03Blz830o)i3+v?C51pW#x{Yy8g*rm>Bb z60fm7FZk+iZblBqE0-a`a%uP3^P|zl8YLxe?=apM1Yej#8)C8Z8!;uDHtiD^1>c-D zi(edmefWMEGxkfu`OwuLH6;xmEoI%0kKW)h8a!r$$7=A{4IZb%b^cu%?)=l1HpCWF zEHTtoN6z|sS+IV|Y21`}jp^mVHKyYQ{YG`-ST}JWKEmjecW??ejMU zThlkE^xV@k(p!St=bpDVT%Fq*F81vW*X|t+=YMCz`QO!XW2-ISwXU(fJ6wM5kMBw8 z^?kCT-#$1e2fcT`8*a}`4}U-@_9VC@-bd`I&3}^8%bKTun!}p^cS>vivy{dMulk<{n+x-AtnjjZ z5o~VQj2m9I{{)*WHe-mF?aN?u$7Ve7vV9e7F4>GN&gMPX*TLqN%{b$1&arQT%{7}b z$7_H7Hu$?7)|WoWRZiO0hFI!Y$HvGQ7>mm3``{V_^M{x1hu|6mbBUMj$KV2iTh7KMkhcVCxxynh~+7L^f zb_}eKmK&S)Gd=hG)8-!7YqFHiW6z?8O*^plK3uTz96n{8b0Y+|=iG=Lul|n|uAaGM z!|T3#E9Ufss4=_tbb#rtp1H1-1cvrh8v%88*Yro>+$g$?wp>W;ru6T_>^gAq8|TEk5Ali z?f7?QMZT$5OulboQ*!$FU~%Z$FfTMYyq@|q2hW`OrL&u7TIDcHaGS%Q z!{yMvADT6sUOCJbOsgDb4;F{64Qqi`Im{6(2Rgg8MXMa<3~qCnD_joSCI1@dxx?v| z!#u&X%3Bx{%DoMe8F;{vs=$J@r~2`!Nz8V+*6sm1%hb~uPphTp}+j?0H1v< z7@d!~^O?s&!PdCB6CdYO@fQx(?~1=jFwNoBkH2ViJ{5nlVDanO8qTNUFCMH<6@Q6f zn&a2vFBzRr#a}8|d}A(uoR9eSu%$b@{SB`DZU6W$-R*DRb^DwBWtsTY{U0qhG5A)2%u6Rtu(y3AZsl$MI@o{xv>guAZ`Dt`SW4oKPEU2H&3YjvW3r z1Z}No{Mf4PwS(!}hTFCuN!!-uI?-#)*KP264PHN3P8*~Y%X`^2)^lAy+E^Pl^*3tp z#$BvUQr5g~8f;!`t~Lu-zxd|CV$^feErMxhrB3~Aj4i{}W2<^w1#8coYXf&~o0qMF zx5+`TxZ4Jc!=|5j_4Bp#@r|yZ+eNQ6uziQE-yMRD(~c?iZKsqw=V;gLE)920+_m9i z?ACDh-FtkGhV$RE;r##B<9}~B|9>=`|35o!jO6xMHxJg-Ug7%3CQrP^cJE;QWs@^r z=hHsH$0fEpKij%Bx^Fn!c_#s)emEdS3eF3)(>^qD(6Fk+1h>_7EZsR>xX%uRX+v> z>j%C1F({Z`{Wv^WKRib^7C4{k#}UER5A%vwKL!U^Kg=;+{WvmMKh$BXejF9d*7oD* zaQf|C4%P>)`f*IKe$cBQ#|G1@AIAmjhwo782hK;X&ZFZyyK@X~&#{kw+CkH2`|DAKEG~60Lwc*Zz(;CixdXJycaQ(5oL;2h*z`*97ZFonttk>c_Rg)erNES3j-`u6~$fy!vr{uzsk+R{gjk zn62%{jp6j$yZNv_Xw{FKg7t%5{kS=pUj4WwSU>6YX8Mxdgb_1Fztxc6&J6#uSc`~yk3r8_d2fxi~FyX=G)%(YH&OMuQib?_Pb+wCE-FJiCZ`0M42Mv~kea&Y= z`u<+{`#Hqt$8L=1HAdFePQNntAH>HPeVDSw{-a=ejs3^L=cImn|N2QZer)CUX)yil z_-Y?-+u!w9+W&WawEtPkYX9?Kdd2-B_}pIm|B1$rt^B?WrdRuT+y0(i`|UpRReWk4 ze%;`28vJdr9KTChIes6kZt)+2)i3^Iuo(Myd&f_~eCYOC^YPzcaoDQffU)x|vGExy zoFCno;?*agksaRk>8O;o?uU+#e1}PC-TUlz*kG=89t_vx!*|^GwziJn2;sY=p8Z^| zBeLhXIJplFA1RpjMDW*HlOqRv`CbXzC^<&WArDypoG+sV%g;KN18&Xh%jm&lUB^>5oc&J?x1UebaG$GA z+i?EVHJtzS4QHRB;p{VZyyj)5aCsU(x!|xo4o_d*ZyBqZ!)M9ChmDWE&@0xTgT<;j zoi&(G&EIUnw0*OlolCO^t3wx0Uum#@9u*&0A8Q@Y5o}DA{!fiLQ_hvc%ei}G%DKbo z{NUFU!~N$x!IkU0!P*nwnBn?rZDn~E`sR0HD`xF27QR)uJ;IpT3*O21Rn6hz zv9a@Ka}=&vd@A21g2k`z;w>3WQ%}B21$)VtZRs2|`NF?8?#o24akm!n8uw*`jXS-@ zeYxOjgMIlNE9B4y+#h%TY<#L8D+Y^Ke?MlWU|Nm)%E8qJ+bTI|;=r|L{t_(rio0sC zxc*kcYAOGkqaFJl5|ob|+0?JPs|TxF*U~kDY3*FD8Q#V-r()6hx8vI%hss}R-PE`! zYptvm+{Ul#2HhOD>&?94&Nbtr#Or*t-u2D-vv#oaxBgD@I>9t+X5Ext)(m~U95gZE z%5DANHn;6lM<40NNZVCU*?wMWKlZotyhh1pJ4fpIy8#=-54JCE7_7~WQr6zQad7R; zd^YLvO?!N^j@R{f^Kfm*wdQ$?aJt;Z$6>LbiXSZQrA?hJ!{x`O4czxOoWolMdp{|b z80zw=9JUUw9Ms3vX~#z#|6izAUHi6)PkrZo+u-eTxX!_Kom20S)XQb)fAzNS{H&MO z&;MEPwL|R2eCO~TgN^x4DJ$QdgXz|GjoB{Y^zzxY^RYh5XSZndx<2n7?0j~QU++!# zh{lht-rwyREIwPs|68ye+Rw879!{_L{|FYljsMSR{Mah~UcuU6Gk$oDi*@O5CVA~0 z-MIMMOZ#-#n%g(n810wR8sER;O4qvsQXZJYi~XR^uEg2YS31Xzil4gb(UmoZ2M24f z#^I1)x$Kxa)(+06zjiL}t${=1qp$56un)|V-z$0@7Q4Rs9reIqc??QffA8t=V7l>d z=irEFY_)F>4yN55KkeZ)w$|oCU3-t_eW&)E^GC);<#$53&u;khW7iLQ^~1O=+V$ha=rwL9b=Vp?Iar@g zNm+e5HMs55X+3^=$7}A-2p31r>f&`BJ~Mc^8JP2PCr?Vfq{F7KL!`A+F!TNB0O6$b^{|&+II=QjOZ)!OIn|u70 zhHLNEhWmZ;Z4Gz6-`;TkcQjm%cQ)Mn{JR>i{kt2k{yiPHcd*|ZuK&in{aX(AMPswR z^%<{uyg%4DvFSfv>+6AFYeP)7ujBJzN-z5<`$IV%&S7j}b=D_`S?yUtX6C2owgXI!ei=YngTtrNV)`T1b`oc7s_^9w1x^n?Ay z95iuZaW9VjrC{~sYklF>=F7p=rnQE%U66XO1Uo0xv+i(vv)|eLE7;$N5>GvLe%3Qv zT?<|frnT3C*TU&FuCE98$CbvfeBS7M><{fV@6G6Jb>Hz;FzuZ9)xPs~IKAfoonSBH z#`bOwnsI~W?%aAWSnlS#`tyD`y}keXAe>(7?ZaSNwf9l5m-g5`&Oy^2T-QZu~pg--w~JJNId@^WJAKb*_CD?r+5KVdGF$VwX>KL!K&iY-QFJo87e)d&^ zzwYXP^Z%&7eOLe6u8#fnyQcp4UHu<^rM`R59lQEJc6IEZKQ;CL+u#A?{L+`9Qugov z4(Q_4dxxRpV~qO!J-+j6KPwrgi#zQ9qt5B6Y-(bXR_WjjvlzSz|rtMfNsV>flj zY4Etg@)$2=KaRg6WUYo>t-PMorh`SaTDVlI=we101r{Xa16 z)cEPc_nqJGy13f@eS;_Iu(A9@qfgplc}y0pjmcBCdAR>`kLYz`xAyo?(bW}m%C7EI zDQmv}7)-DIaOz;1^`{Sb^+6l%7rojx{uG~DQ_}>?Z`zdA#&p5-YGe9f+MvWKpBZ{S zGltXbQ}y@UW(rrAO>TJQ=DOpW<27@1`RM;F9oGLpci6qstike}EoI&J&mP>~&&|=} zb2i-E&DC)6=Wh7=Sqt+tod3KH=RaSM&)?$<^!S23zEF=Z+;RK2zL-N;KTZg)v0fzH zSk!yLMT2R-+`N(47#;CpLUOb#$_vcFl8{c2MKVLFBd;LEIO9g933^sWgPkP;d zE*&hMHtQPd{8}a&TjjrOu)1OxbG$z5SuWUjsp<8seEDE{eb%u;u$Xi)%p*;F?X4J0 zV>4$z^ITW`E_0=D|KBW~-5k>@x0Qq2v0WvcW^60Rzl2|s`8U4$ZauCV+>T}WuGaao z*I52FSX^<~pz=w0x#Q!!TQDK z{J`t?;2Q<+F=lKj#Z;G%y@jpz7JIk9VVP+kuyORd_HEK(=klhFzFCL;UE|F=>~A!0 z5p2)dGG)7Wi|_9uioaFoZ@<{O!{Tq#=-YN!{Oy9p+&*PL=6qeu9h$m3c38}vI&7cZ zIar)sQnqp2pWV^Lse6=N<715a_b3m=&#S%P*sW_rd%JgS?2)qe+&zQsx#kBJv*zw^ z;p*3V{d+LYxm5Sl{|Kk6xBO7QJO}w__@&`&UnlNfDZSidv+bQ@pB(xNS9|*g%dxIg z`vucZPF->K5B3s=?SLEy<`4(2K4{y0Fn=~aWpmAIZG3P!@|9QRb5L-b&%xoe6B9!| zhXi}chwacDhvg6lt~OmmwZWf_PuYxHYva@ABd^M5V6Zv*dG1rT=f_Z^$RyYF=*|z% zs1NV3xj!OU+k;b9yd#6@HNQs%%jc{d_1);Bqw!-azhi>w=f$t~yJN$(=enV9IKOJ^ zxZt*}z^1nQozt0jY?<>OT{I!SI`{FBu{kB3JHa>EoyEf_DRl!&1 zpzAwceYdaszs|j`iC%rbw!?l$b6tb4Z}1HbzA@NX-IUVU_}=)o<6JT)>f9fl zO&$HhtIh+#Rj2N4)Oj#En>zZAn|FPGDA+iczu%re98NEvM}p{dii`3Y}{I(Ps8cXOLhJoYz^1ktsSk_x_JEg$blbw#s4h4pTp!a`-aXdaWGRX>t&cKOZ^pW3TvMh4*v#IvTxvz6mzoZ4Tdt(<_JXg00QUVcjMN z@%ZzR13&hP|9!YQZ{zo+-w$DfZJ__0^~;iC7)VfbkD@);qxKNllLqgM_i z1zYcx!v;+b;_>Gr2Y&1of8^-aLtU?|@lm4D%V*SJbJ6;Y7L8sxj2_&c7aKM?h{vCg z9Qd(U{4t{Ua~Lxky?n+BHWzIUV@IP`4&wy3=fy@%4&w3WBL{x$6@T35{T#-NMlYZ7 zgUv;o!vxXjmBWO=?Rl|rlY@Bt`N)AEd&Qq9y7kb;|4lS{`Ai(#pNro{qgM{U3vSPg zO`06UOaxtJ;%y>j?taC=^C*5n`_e?D^H$6j@(j&7Y6|5La%Q+%3m^H+bbcG_@t z__2w3L+tdbGhMLpEtN1?QIcfA3S~?(ungeBOp@cfN*e zd;T6@pvM<%xc)5EabxE_$im_JYmDTEv+W$)BEkC4CQlreWBaUS(QsqKCSSbfX0c#n z#U^*0&EExBJlNQ==?7l6C4!A5n?B)XTQb<#vgscV>)W8j^>>|LPq|e1(mC`?{_>We zJhUgS`s#WA#lPNv4Nm=Ky0tNXo=sUB#&4PUnA2rb)_cw6g3YP?c22o`I6pbD;okF! zxkB)YIq2HNt4;6uR_@wdDY`a23t73t+Fzx?f9dd)X?)dSd99YR^7?CVtu;QY_xKt; zzGlOH$9=7aYkTb;U#H>x*KIie^?H2$hV$Q`;ruu3@r^oO--p~de3Kl;Ro`&?n!1|? z8>hOz-#KHwSvb9(_iY|bvv$8uj4gt_JhNciGRIaqj3ZpJw+=SGb>Fpf>TDBEuh`oL z)7sBUwhNbMJ$v6im>*pZ6@Q0tb0faK;e5-!V`Hy1wo^2EwZC&Pt@^f0u$R8E?V5w8 zZ*axlEm+^`eb&yYvwJwbV($@5Yx}lmc-yzXh4Z7gefxX3zKO4IINxgjAHmvh=k1^2 z^lE>vU|QS$-r;Tg`-JnOx9#s6u6^;fkMlK-uSeT2So>_w2fS?i2dm5G+`xTL#d&%_ z@PRq3IrZ2*3#&0!Rv!-v)<)f*?3{KF4yTLloW|9+Ce4v`SoIGH?$d+M;M zZ=ax7dq)LpuU!L2htv7n7w~HDnBZ#99)VYT#|CRp9X9ptGj#Rs!N&#Ds{P}Gwcnlt zCxp}M+&wW^J-T%$u6d(Z+>?U!t>T^>-0#;Z(dgBeQ-j5+c1{ZxhaX$DL$7vD4>o7j z&KbelY3J+AaC)_KR&d+S+2P{wW2<)P75AKA>$}b!-1W6xr`BtGPi1^p{gvzHx$(2E z&r9k0Wd9!$T-PUi`1w74LC59B*K_5%7F-x!e-BhcIg%SvS58-lOJB!_RE8>$U(2Ux-wYq_5IkZf`8V!`+=*&)uX!~ z!0Ucs-Hg%TZY-~fUiTo^cG&&ob-~8<`jp0Xoz%G@xE5t>f;u<#>Czaje7G$boJg+-KeqEZ6!R{mx+8!QD8^iB_N6-WBY3mvr&;fmZSE z4sPS!6Ha@ki>D7X@qFLy-rzCD{-tl?=`*e7^}b-^T5INhIEVFKYvzG)dV76&FkGzm z+}KN5{z=l!F>;;YM6bsq~hmg>qI=UaI{9&Bvb%nM$9ej@nE9CUW` zNUPXS1=HHRo(`v%&ojZ=uX_`mPuZUh7NhO|bK&&zc|KT-+DC9c`e6OM(AlkjxYoaO z@`P^vzZkvN|4R*ixxudlI~V?yvi6u)gX>(d=e*YAulM*H9k20yGhBPMF5U{Jwd>;T zaJqG2oV7`-81Dqjvtqm(Ogp?;TknO_D~3F2732NjHpU0xv~!vmABNK_hQ86nFqR($ zi(CBTV7V6mBv_quy1nVsV4A&fNT>ZfTs^k>Zq;YOVzcSLF{D-Qp9h

i-wPv~~>t z6Hb@Ad-E@Yz1+XDeU;yU+uraZ|;acDBBk%6kcl-OepW;(%{lCH59x&ejy{{W8 zxYo4&c<3G3IC8lO>ujnAkl>wWKN!R`2r-s5BR_?R89 z+{OymMms)ZhtuUMJ`Rh0R>lMt*P5;|9Vc9VY}&wUe8vqnK4OWXE}zO_yx__~eO#S( ze0EM-A7*^AI{x=N!13c_otd**XZ-wKPOtVmz!SvR*iD$Sa-JxdUgzp>f@!y;uDE!` zwWs{h#ciLFOdKC^f19%Q;ok+fbMpHhpQPjZ%Gdc-_j-Q_uX{c9@XB}6U|QunSupL2 z)UR`V@^HE_se8RC!s+e3o_6UKd&*#ak(2!Jy4Ra3_>Vd0{Efxb!D7_C-k*YLwRcVv ztRCH1;x(4$XzEe(PhRccMVvN1#&Wup#?t-S^ufkbyY8)K2(Kw>i4p+VwL_eB}1$lr?6v2GeUkW(%gBnRdm+E3PqHyo)<~ zbm#Q?Shd}E*{&E?wY=Au-w@6!x+*k$K`_Cek>nOYxlYp!rOkV z7*1=?!l8mw={SBG=d z*sKxEhi{r^_C_dWSIAyi9Nicnz#HqR8G@M@j z+$`8{&ueXN9?plZZM@nxW_xvQkCe|}^wa*eMSSY|y=AaGwn|y&^wz<3PV?WU<8rd! zZ5u8|<+xq2IObeky!N&2gLlY5ukUp47)-Cff3j1sm~=6;N2?e+2iqIz`iNH_-J|T& z^>I+wM{~JLe5#MT2J7Q)DJ$pQgXuN)djzXf?~!mm{pw_--%{2X z{yn(H&_4Q)j@LZ?GkmWc)&AbW;bkDuS;7c^YE7dBkm7xnnXJ$^~U_3hG*n|piNW#Rg6Y~_a6 zy0|>pSg^?xFWVKt#)eJKc-gKDHdbu%$JusCdshV;J2rj6*?b;*b+EBy(=WVi*903| zHhsj)c5Se+X479B)>rSf>+{&_!k5k-pr869Z~4hXd*Z6Eo^`{&){S*|*XX~z*SS7= z?X5R7_{I)TnF+or*xcWoQmlcgU$Kn4--mnM($(=hhFd#qo^I>v-=4D8&K<$++PSmG z@9Ocp8*c60)8qFxT$}gx`29WpK*#M>i{@VU!SF59haGd63uE+9G`jJZA71_s2lL;l z@qZ*5oxl9?`VRA>!M?*R4jUh1K-Zped@Pt&=l-j7H~go^aS4{j%mq{CRv{3jZ==s6BD{^5JVeuvdOB2h)rRTgt~A|5H6R_|9w4pxU=bw&vuHg)Q6IE@<4k8Tddr&XJy1shlU%;>>h z{;w_i7&&s<4Hz?Jx-(#`l*Sxa-x?S@*qn-O{PBuCPO$da%mrSyaf8h{n|Z<6{C`p7 z1zQ7b<_NEP;|E(4Y~~AR+a>iT2(~WR%pG3;|L}yt6XlSjT(l{+7<{c?KKiGhV(Q;- zf~$Y#AFo^{4zB)L3wYUn8(jUfE^s#e`(1GL&l%AxF7rQ*1H#>K`BdGq3ogiHV!{;_dmnQFs1M7JHq)lXuwqo}$Cn_LL2ts=ENeH*=5A(&K;bc+JPG;qrEV z%$Cy2`9YsOhx22Olxb?foGJAWSKs=XE7+J>llqFYSqF0m8$&kz#;e_Vf{iJgF~G|< zZ?G|DGah)^<_k9FY{mxnf4tj|=MP>Whdi_=uKN7UKfC<(Lp=E}7+m=qTbxb)3k6sH z#u=|X77nibjX7SnMS?4T%xBc&%@L zufT7Qy%vw%`nEonXz-E^UaG-M2aCH*%DOHu8(iyHoaK6a`5s@P$5-s}l^SkcuiS9) zSLyM;G~C~=TD9TsS8$$HxFm8XNFq@)9O9Kmcd@$Bd~3igJzw=#zCB|gT<+F*(O-u>$%>x!L<6!W4mB= z=<>2|Xzl@DPrU7e{k?xWyLCpZ_I3zv+uJd`ZEvS=TD7-xusZb8b_u56koL4;ZPF@- zU4!L7Um<6q@2cz;Om}!?N&j5eI%kug{C5wQbJg1;nC9?O&m22P_Kap;wuvSWoPWjo zTW}le@8L9u>x64W#rj7yvAi!63+FGE{Qns&*Ye*hn14On#QFHVyyjV{zW1@Z4 z#^s0x4{q?04L+*DM>qJG1|J)&-^ZnF-{<+8B}aARcYNn>%uZ$-(WI zozigiPwjY(;c4Mw*0`M>EUs}=7q9pDX9W8jE%Z9?&kUy5Id@jDm~=7ZL#y`A4yLim z1s7j`&k6Q7TIl8kZ|CI1ZcfgPZcg;)ybha_A;HG-{FLoj4$eHD-qpRJ^LJie*wnqK zt9x-uZTk$o+IC)D*v-Xw-LrpfU((dSw858ku`W+p^KwP7d8u)@GF<%{_p5@{D}Hsb zn04K`CYTSs#{Sx1dX4>c!Q#@5uQqAMS0Anq_B}khKH}9!V|3-OjL{A8sdM+nVB>pJ zSHB%2_5I)8UZ)KAOP&2w-rUr`rNhppTRZIke7!AL+}l&uebF7kwT}7U*>H1xSC8M_ z(>URF6O1 zyF2TbsP z=R5zQ8m>P>H=KQ#hU?F;4QC&&$A|Cn5qf;Y9v`XU`ZIFF*AE}1;o2Xy;o2Xq;q0R~ zT>UW`ZvBqg<6|{k`(rm;{c#$uy>WYdyoU21zv28RXt?p6u;KEZsNwv7({T2Qd;GT@ zcm49r>38Awc-J9&H(vJUUA+3&?@e7!Tq?)N26EV6@qE^HE~xAr&nC}QM8J?QgA=+%F*Z*ca>n;{Y~7z zgwrdo`!QO@T{XBLceQBriu>1KTKj#u)x+r(*L@wW;;s?gkGp0xdc|EUn09oN@7m$? zitGN6R&mz}?#Ep>8olDK7fc({#9cp}UUA)L(yU+i^cw_gtN4cDd}_bjD4Y-7dU9V% zEB}pyjX`}ccav}$U-ztd`ED9)Jr>_A+`LpGr|lKoeur`Ia6WWx z?UT|=TWtI0plJ)Pxcdd$6OFs~DtNWAf3WY1*4}hLIIa3~U~sj;c2EwQHsI>d!NL0T zw>Ve3heV@SySVwT`pT+*Xt4V2v)#j@(JS7-VENbB4+`f)=i?f4c(7|j@gu@rXR3dL z!}-vMSUhvU5X!;CS>|=t()<13H)t_U7^=H?_sr-+NMlYY^ zJ0E=(1Lxy>a-VxbXa9Tbo`;I#`%foEXDh#xIzQ*#ZaKuF@hhK`J0Gz;->g`tL}x3% zQ#(JgcJE@*_?f#J2W5@JX~FuhUGIJI>f`Cb{dqkj8lAuQ#(3Qaof+)AxZe*eZkeXXHa^l(9k2EEP*O9iav6oeyN29Z;C;mpigqtCV%xj+v8j7;i+I_vs+@b zf_krm4m*>L0taY~MbG(qlT8E9%>#@HWEH^Pd z@55_uUkbKgvb~=YufI3)a_}oT^g%s#=Z|)^p{?r6zdFCRFRwh>Ven!(zn{j=ItCl(R3s@~Neub;qcGl_N;OgGTjp2J=wZ!-x zTx0wJR~w3d_IrQA)f2;a#cJu_U*Pt+{Tod^x%~sSZ?%6NQ}gZ#U7PuMt#v{(e*RX% zXmBexbrgOIyqb)+tp?a?dq;I$=y<({#z#I6kzM5FH^$RjOmzk z2K#XwwN1sL=9q{RXKL`<#4!hdZXnm@v&b}H@2A@Q8G_t>u768nTCjgJ#X8#b@#hfg znVZvr)v{lA?o5xSuD?HrkSCWJz-rq38H7Bx8NpsF+WdKhJnhT`_8PD6kh`F%>;ES< zxof=hd}gqJ@4_~Wr9J0n7Hrxx=dJJ0N7R$ctYEc_{cLcxI*(?DKZ}obY;z9yb4@$) zIpNmRPn+%da}@Q|nG39zzY{SxT+N@wdXDD-`|%uWo0mh)b1Y82^MO6buD`zDpC9gc zxfkidVcTl@WlR@neEMKdoCVR;v#u-zR`dTRN$iE;iQTJ-t){PWykB$$*ZaXDrH=O~ z^HI|;`78={ovU+cF*Nnu!xjgtrOp!I_S(24ntJLi1y;+wY-zA@)a}#njMp;Y^vQQA z#&;jMEV{Ogq1^gue>t%2I}RDc<P^n8 z!v9y!tD$SlILob{{8k6MMrN&F1Fq)vkaN5yJh2aK&X=0LiPH^SuaULjYFVGw2K#aD zYFme6T@L52IBl&5PFs$1;;)acEq>h_zyA2-e5mOcpA8xx=Y5?=8=`BA-$sp}vGj93 zs_7S>jT;|h)$_Xvy0-Xj+V~kO*MpjV`Z$j^YkZ8AJ(}0a=IGkuw?*S;teihJ{q%9J zZrS)4tIpM}(6wa@jlVTqJ$1JMJJ!apb+<*=mT{F^H}$py=N#2~+oNl%ecA!8p1M1N z|5uwkp=(P&<<_;HYuL`X)`sEOq>((dTm z;%=}_ z`|r;>tt%e@j=g7NulK}WXzE$NdxO;mQZM(iec|f4PWJ=*@jBJEKZlyvskpvdP?vjc zYxjCiJ_mr!_ibXCuiQ3bKM?L-A@kF{!a->2j+y7Y4>;%C_g2PH(=T!Qf{l|oXPkr4 z)D!0raN_uG%Q$NKiT#g$!Ny# z+O&>bKgT<5TFTRZOS#Y3=Hzp=*VU=S7=X>6M@qZU{l~>$o_+lwu={%3FqZc0>o3Qq-TCXf zWBswPt0$K$z-n0wuY{}Vua9%*DtO)34~Ex${nc>m>8H(h#v`VBVq61O%f9|vxLWr0 z*Ma>wceGv4q2}BXXMAn|dmieVIRs5TYu$}t$I$yx)=V}166Yqcaq60RGn#th+yYLV zteI;1W&CahJLgj8wo=C&9Sb%6()R6*Pao`wa|fDw=Kq~wwfdYf6rOytuTs-Daqa?p zKC_114OjDgW?tL_S09Sz+(=&cq8UH;E4jY*q23>IUMyX2yzV_uUhC%Jef&OfuD1!u zNA6nU??Dd(`+Lyp#vY$iEqUAzHjlb?KY*s5aelDz@f>Hrs-|CZcnF*vCZY{_*4&4| z{!J`(bC{4)EjbJan^(QwA3;-(&!b@T>_ZO!juBiR+qa*OfsLVUVoJIByZ?F|Y~6m; z&0KcR^aQ##>r6r&dFnh_>Lf4gJcX{!I+IdIo;puAbsVRIIHEsO^gif*m(1@4Jd5uC z4Q9T!Z#-k@Z~V#Nr~Mme)b}TzL${ChI(i;WJ>&EO*uEZ4z0A`W(bO~FUjkcCJ!{d+ zV0GuN_m@||eq1B8y~?5H+!JTqUjt{{Cnq;~a(NwWE_Ho<15MrjO@Y5WdAeGBa07_$xQY0nsG^V&#$md12FeH-jJ=Jz<>fvefacPahYNA>qOMsV0iar!tC zoIZ9YmfX4Rwf8>Q|JzR8eB4i~Weh$5I|lXo`VdV$J|8tc2NKU1a(!a|xUtutlYWAx z9-mJepRB=heX{@i3|xPY;d8iJ&i5B!KaPdAFFC&Aa4f_*M_+@ze$(zZaQo-)HJH0x zpZI^<^8c>!KdAAS>*M(|2ivpV)c+pr?`EXEAKN=34v_Z2k6H z@DsYW`2F1YkUvoYG0awfU{S)lR^Q-MI4mH~mXMFz#XM7X;A2jvE?wE%CGyGiQ#itXR z?K@6Wk(WIC+0nqGbEs!OI|kf1+PoIWl=Cq^ZDVn$nV;CW-WTmpavdApT;E31E*}VX zjC#_Rb&VlUUoFkqwP74=?$g7^h5Nf*c^(=Mu8(^9IX>7tG8PlS)yyk>n-FeYZT4*< zN^#d`CH2OX}J9|7uTLi@b$Spxl9UI%l9!S1N*UEZIg4T*{(Qo zrT`nqF`S0@a_6qk#8ZMDC<$=*^;KgTv>Vwv+i6S!VaUEpfD zo@NI7acs5C!lCBaikAAvMs(`i!a~eE428EExuBVuiWCRwD_upoBwJpzIx&I zca6gJU$e!#wfI_vd%o5#-2SgqxbfC4-1gQhT>JWkTfckZ+BazN4GTBkMuqFYapA_> zq;Tz<7Ct)@X|uwOw|U|EZ_(mgw)j?sJ04pX?z6@=Exv7wZ&$eOZC|+Yc4+Y(TYRU& zt-o{Oe)ndV!nN;Oxc%L&aP7Mn?)dFdxb{6;e6JSYyTyAHZa(`IZoHm_n@_L8wfAoE zeOr9L7T>?c4=CLJ99X#V4=Q|n?0pJ1pT33be{kX24=LRG{R;Q|AKK!F6>hx43%7p% z!p--H!u3D0aOdw)g=;^$aL4T^l^jlkx9gpbrk;9dfYk<2FZ;JM;l|UJ zz1dm%aA-rpCYlqZ)#VDr_sD5X5V zZ+tn}?_F6(n?CxgyPrw>mf9Ah^qd>}9m*@<=Avy0O1XOx_peuiS7aZeu6=Pzwe;gE zuyyMmaWGt6|D`D9@xL0Zf87gS16R*ny%u~AZ4crYP9E-MuS2_=QrqL?t#&mAH4p)f0C)uv+2{1-Ikgg{GdkcZ1dHcct!u zt0(UAV70`(7u=3}ADVjN4g;&*T=KmiuAaCnfYlQB0dPC+gJ|lB`w&>|j1u=@xO(EQ z2v$qn;ox@MN6^$0_ffFg;1c&SxO(EQ1XlC9@!9louzkpTt0%x}p5v7%-;a0~O9f6fO|b7@GFBttwvqVeJ`%2;+}{VcbN>L%anTmP55d;eW?S7T)f^M^{s_EaV_Tb2 zp6APt!85U@t6O(1N;U5lpHO#H1?Y90xQ%_regVmgCw)GF#k8Nq|n3i{el(r>K>`q`~*Yhjqp9bdj{~-zdd|68?#ttX>$~ys;A*ZPX=i-6xg3I) zxitZrdiaFkW$?-RG7;QZ`f0Q6b+M}_?!;i@_APNIK~qoMNx|03IhhQukGeibQ+IOk zlK6yA0bd-ReZrJ*ebfgx^R_eCSlT?dQ&IZy+-jSeL(Ow5PR`SS&G|rlljF2#>hYPb z@#%|?{g&(FTue;M_)lN_6LSVM_4v%#_|$f1LQ~J!bOBpW-9F7s>Bl~4n}tKoK8Z6Q zTzfKhv!Xk8+8k54d8eLbd}jmeqwji@^5i=^xP5NtKvUO$ef;IwYtISxyM4ydmN;{v z8^<-sdUAbZpS##IcJrXAXI$n5tK~YL5A4Tr(KbJan&Tp_YleERl?A~5u2JUIf^fBZ ztt5+J!{aCU^UxGy`|vodP}3J=h|KdY`i>Rgn&U+`VSieU5f=TAL2Y{#EbuY|5Gek(VA?kh5f)bxwbDvgh^ z>d&%PMb{R;)fzuz_2IDJYWijTR&RXr&R*{P$lP25?q1Kj?k(hE$L4hW9KVc(r9Aga z%e1p5Hv3=i&E3${4oZyUkY98c%b#$Z3rBW;^- zs5y_s>F1{4_MF}fP2F}krCqtXdo67a_Wv1C*S-m*TITW=V6|KiTf)^Yr*1vhTcJ5V zx!$&htLblCZEBw5nMd1#Gxl3xlgG9l*tumJTT;qn+aBDtv28^u zcYgo-{uS&v^`{@%OS|XY5q$>^<86&yo@-*KrcSOe+wnTw8C{!owxNzZb#^IrypB?5 zS9ERG*_Jx;)Y$FscxL@h@-X?{{zrS^gY4;-z|=(npRyz(H91Pb--TFIWQ;Yv0V71JFAR^y)vZ^u6kO+Rgke>zxw0I{;aIRma<*UB^D z_9^wwf~(bfXT#O4H@I1=&w(3HTb@791=|O0j@2H-RZCya1AE?_fBszHe7N(&HOy-BJHsgrjhYWgLg%fMd4 zb$$#&Q_nSgIan=qt^l{M`zz7ZQ|BtMTCU~6VB@GeUVabeYH<2gzmswey0(m=-1=$% zTCnXq4mlUsp{b|+>%nTNa|5`Y^AI%k)VUF?mYi<_8%I4k-wgg=Ip2b=E#oY=e%ikk z+;0CiH1(YK+rh3Ib?vwVO+RgqGFQER?*ywmr!)74!jqTJcFsLDeH|nBBX@yUVcf#+ zhC8S8JI?pO)!aYk8n_p3J#DU)_fh(Bt<*M*<9-g;N^$ae0Gxb0k2y~dqHBxaLycd5 zd~&|k^o!5KjgQw=ou|XmwZ-p|#?M&#IR(}krk_6U zbDn5?jFo$!*Wr`s+T!kSt@mhGg@iA7t7M?-ZmN7K`vvBp)eGcqc8^6|l z9$j0;Rc_tXdjXttRO`KnuC4azCAfO(z6}0fZN7r8E&Y^R*LtpzuY%KNeFyOxy0-Ye z-uT(4%x5+I^zqs4jmF0@%d>}j#y8Qm#qX`g&sceOQ`0X#Z#O>1%GzwKchI%P@7>1F zSo%4~)bw+Xd(VCkoc;D1oELf4fe~Q)pYMdp2T&*Wk&V6n4C{R~^{f*gfYk<4FZbFH z;p%y&`3UUC>r~sv9BN*tVxJZAK2u%pwXNOjIr)47Hs802WnbmC5&Nfb_ZOL;u1}w# zsXJz#^UuLK=Xsu0(=Tzp02?QB&NyG9sVB}?;KcErsQIYrCti&Dj%DqOeUcweY2PgK zT=+G(e&_2OxSD&I_*FtI>N(fHg4OI-&h>9_^_=V9!G3H{+aDZ%a@d|Y?fnIAxA!-iy5qMOxybYE{SVmh zd8upPlTt0Yc1$TT#6jl^S-Ce^I)##Wq#Ux-!mNlYSTyVIO^-z zwS8l=)VF8j+ndt-yx)yRn=2DP{U06P?*ABQ>c-z2UwNLZ#svEvGWEgDK5;BG<7)F< zk4@>vvDY>Z$G9BkEl%6xfoWMQaIB~MZs_B>U zn+sf@d*&{6%+WDd(=Tn$)A-c+KQEek=Kp+PwffvMKRo$lAEu^n@>u}v`OI_Sf^ap@ zXXeF1aP^^R&W+@?Fq-jmf0OHLAL{)i=fzT6UAJ5VJZE0_iQ5%y+`hDxJQqPzxBfxc zdiZ+u|H*wj(yC*Z30PT;qMID>tU!=UNi%_tw;Hvk#?Oa$E}B z&T(n$|LZgA-n~rYA)qoo-DX$J%{r^tFm^{`v0P+US1w&3tX& zc*f8_W3vv}e$}5LuZyOh>ux>pJ;e37Gr!NZKHA-IZSm_4wyrkwKb*K~Ip-UI)xtM~ zuMSWBjo|vI$7f^kYWU>uP;CO&Pu(_rSEQEnv?(~}sXsQk^TcamGqB$yQ@8Gtlxi7^ z&6~P){o4XfJw984J=gWSe_NrcrypB`t*4$fXB)73?m0&gLoNHrZNc?t5Zl4k^!Iw- z9_+{KUE2;EJ92ovi<93@VE0p*hwhViMpI8NyMWcs#pe3AH|4Hyb^D&W-p6)BGuQa- z4p)nx=f(PaplLHs?wfmp)s5$P8%ND^W$eAcYB|??!(BJCCij5re{YcANP`}Zz=cO7|-*Oy!Hpz zybge?C9ea)e#}eTK^%QJj3Z87eZe)agW)wVxjq@wL%?-R`@yZBF+CKnkLO-n`g<5S z^Tz(l^|gQIYF_4-b`A&EcKXAYDeW8q*T;6WrJW*>wH@25*G;D;yTT_dISw5Oeu(QGI0D&+bb$F}UJZ5U?&y1yTm zYjPl5E%Wdcupj54wo^IO?7KK^o(4{viG4bndVUA!46vHlYkbax+rGISO)1Zw<}C2& zXzGKT=clvLjHAtU?;J`$=BMpk4mI-=8`pi9{YkFpp_^;=Z1REFJ#U|)Th|!!^wrXw zU6;-W`&<})0en?>p64%w>!Y52UIaFejPu2CHShjN;{4M35G~9lfi}$%f_^ts@E|mw0SSsHXUd4l+bEy`HC9{OJ~drp2Ev+8^_=_$6QsLHrx#6A%$Lu}&lJoEi z+_BSUzvQvK3U(~D*+;q8tZT_@;I5n#>uA%*aZ#PgiU6Yo83#8Xea5n$uxc~-7p z;*A6+p667ac<+PFPd)KI02{9tW91mh^)sGp=7(VCZ}>-G=WwpOkHPw=C*~($WBN|a zm~#DM|Fp5^=LquLBmLRbXK=5FtdIWO=ySNbG4%NYY#WC*ZOHYB{Y$Vh?3cM4<14s& ze79fdZSniD@$N^^m`UD;0{9AW-G*ap4 z%3<5a(#L+7rhR<_>5hAYR==JYm48wjo-cG zkUg`Se(@Qv@#%+89pCZMwZ(6O#_wMI`fzydsp+SW>-L1jCu5-IzC1n?H9q~(;-fao zXVl*!nz->fxM@c|0GwycN#Oq9JdUgFPTKfqFC*6{_Q@K%Kg%_5dDaB$Pu|$Gr;^ua zi7DXTPxGE*N_c)Ak@rHK(e%@ncBcY+E`4WWyK?=sdmo&-vG>F7HGCoEG-&Gjd!LkR z^L{feIP0(XOu07K(&@nFXg%+r^5ii+*gWdb5oSPB*WdfAT*t<&ChUIP_vzb(V`dKb7~Ghr4z_2z@t*@+{pFt1>TkXH&sqF4rgNdG$7k-wr!R3FE4e=P|8TZHqfhFH z{J(p-dGO0#Zr+0DD|r5b7btka20LdKYOs5`g~9f_D`oa_i-7B1ZqXKBtZ?^Ei?{d^ zExu%nFIBkL&eAQuOyTCcY~i-IT;bMVzQtE)@f8cV{z?sZ{2kM}*IOCx*lM%i@|??6 zz>c*x$3ULF-l|~tde+gVkK?GG`MVleO`BsT&)#QsuzM->%#}63=9e{2u3zG<2~Ir6 zT%LH{!0x5g6K^fB@p6xp>z8nlCQrO|z~-l(cm>kc+v?(=f}jOY5a0oeH+z9HB-p1s~iV13jRb7QbEv)7aB7yBlSJ$pTQ)^+z% zo5HYiXTxVka)*&J*e+3U&miG2&OG3=MQyKmbPt{$JQ8lT$E)@bV4A8iA+p1Su! z+ujyz+x>}|^QxwAe70-(B!}(MwZ(6T#_!$|OHIGzxg)sUmz~hms!rwX{R^5-H(0I)Dvev zaN^`TRMRg$`xl>@^8x7E;&))A6$Gg25O`Baie?=X?*$Dv1czQH(u7A{_q7kvX?so+}_I_iKd^n#61e^xyoKnu3zj&H}<;6ItEQWZ5<0% zdy0Ib9S6S$UVp}TJet1R9Q&my)zXI(z-ro-p_IpVB3LbXodh=TTq~}{C&SgV_6`86 zS=X4_)QsUhd?45u{hD#Skn$9`y8g>j%FV%j>Z#yG8ryP|^5l0K*zvdC@|5!AdOFy4 z>KL8@SJ!_9O1XGN%DPWI6Yf6MTx`?W`s?RD^(;z1?o;(Wo8ue~_fO*Fel9q`gBNo8ur#_mu84F9*A)OnXbXWr1bL#9_m*$ zd@viZms=*^YrwXVHA0^IxYzWxjXn1a`IW>@{p%Wg)=;_EwC$On?U_gX zuW$U*p1j7pp|Phu`H0fqkj9?&xGU$4a;%{iEc0nRzN2_JL5 zxrw3QEnxNhEc;fl+8Nk#UT$lAvbNj~SF3Bw9q<9v(RL*B+Sqr3qvvObL(y!*wOYHm zt0ni5=%11MXq0!MC-=JxzNg@O!S?AsN@KlGy~KJGJn>}w!{_v79Ng;fc%q{wT$8G;Pc5@-SLoTJWeJ4^38a>f$kXCzc(A~xql1nc)U&Nc=$Ka-T`Mk z^m(^%>%Z6VmGJ7$zGDP@FWPdg@SItue$jVDqs|j1R#2 zgntM&R(`hp5m+B}|E|f$l%H_;@m{9=Qx0`wixcZJu(4O-9pa7x!HH=VHG&#~ihN%b{kD;*8U2_&7GnUmv-B)Ymfo z`3`J<@_pa$!D_B&8EdsV*2cBI^W_Jy=f=4t&RlY?_H5?T0?ZNH{SkZCn4iFP&iveP z+thYm)4uxSVBaek^B4H99QtUpUA469nBT^={O^akmO1gQyg8h?GXxu>*W zqoEs1yZ0lt(ZS}SE!r6153xPS;k#7*#zfOsn`0)=n2p4~))f51XDsxL+1L&CnjWX% zaSI-=!9IJB-(a8TCMbBqf+uRQF((FlP9~xBoXpP2m=v6IG6nm>$qIKZnY?ggOi{T0 zQxp75X^WDzeVCRN5&xc&wMcC#6J6E)MZsf7e3wG{k z^E}C8n-A<<(&jmnYjcg5AMCu;=J}J0J$Dyi&;Mz>0DM6X^EEHqw;khIPk-wA12&E}`yo$19K&7c2TN8**6w8+Ki8@F`TTZl z)8FN=+u!|JbC++h*YgSmuLyP=R-()}tPHN>uu6-s+TyFV`06dbM#D4L*Myr--D7lv ztH)=p#;0BnYon=WKfexG&AOfwZEA_Jt~y6;XFa%jo@>_!A3i1T7C4WNad)^r>c(^| z)Dm+8u;bSY-<;eotH)mqT>c;eZt0m^PV8?ZJ+L zakM4Q4q(Ti#@P|B9-p1S?fiE}Q_mRe0#>uG?Q2s@j9t|^>KN<>SI-#i4z6Rc2V5U@ zV>+kR5_3*0WA4LS&0*4#b?_XWG| z988&Y=MZpRcltFv=jc#){l4a5a5e9}@j1NZ(;u!j1fQ(4N5HMC&3pEdlzzNtYdebL zXb$re=Q=+IY#;2mxythlcP!ZNN~*{III#WH-@eG>e>_-!^*mdg05*;`*MGS=m`h@w z2sVfClfbr-7$<}EQ8$L%81_+qKK;zS!t>Paf`7UTpF>13mNQOt871MVUO#2G==qPQ#Pexp3>5 zm%j4kbsqS94)yq70FJ-09{1*+=BK`IyAmJsxQa4)3NEcWc*xy>9Bp+>5URt9yR!gK^a|&ewz0j&AO=H-Oa!;&1-)n!jst z-Rlg&u3z2j*w51%zuM0m@wJ~f{eSAb`aj0z=BCaql*##4uzJSgHt=BDtYdLInts~i zcL!Mgs-|!9^z9k6X*%-{pF7c=>pnvbZLs^!y9&O$!Tx;Wo}%B|V4tV%1KZzWl%Ci7 zDIesh*U3XI{&0&AFWj~sY4Jx3H{Zt!e`aFd2NZ4}o@ntWTl}fQeP8u-;nshq#h-2Q z=UV*v7Js2|mo5HP;hw** z3pd_3E&grc`hQor{@=Iw4~6UhW8wP$)Z#z4_%DS!KEJm3Z!P|N;l}@?#s4haeEw?j zzYDkiKP}!dwfN*MK1GX9S-9iVxp3Q?s&MU77p{Gp!kr(} z7Os7|7N5Rw-cWLpNTYQ!lpS5t~%~rVaW-r{oPc%p2zryD%+;@C) z6>j~x3)g?1!nM!a;`6om{DnJz7bx6(7cAU(3$^&dE#9@o7isZD3wM4kR=DvOZ}BBs ze90DHs>PRX@nu?k*}|Rw%N1_>%NMSFg~GM3*y1a-_{xP_f0e==&s7UI-fD$wU%hbc zYZUJMUbArR-CBID!i~3f;rg#rxb}4m*S=nhuixU`TYQ7Uou3=F_(p{rf8!S4r10$- zuT2X#zs(A_{^o^i-=c8qZ&|qa&#hW~>%u)h+Z1m7Z40-(?OJ^M!u8*waQA~dw)jqk zoA1tr8-JG;-?eb#?^d|=cQ4%fdlasH&%&+0SK(gYdl#;MkHU?&PmA|#@m?+7yKvX9 zeG508&`7)@Vo#&KV+W*ZmccL>;hzBc#Za^KH5XZnHF^1kdM{0~J_*T?Jdu*N6va1Oxd za5VLS_*kbu*m~-TeFWIpc|WAzk!b2iH1&=Gt6AUtw5cWcqrt||=JSGF+tI{22JADA zHlH2jVxJqH#h(0+MYsQXFKYhBp{d8`_{OL9@dPyW{O;z7VC$);-brBFvyZkd*H621 z;^f9|T2;uIBFwCB{W? z^~CTwPAxGm2GA+-wWLe)|NcvH4pQ#4cC|3!1pyd`&=#0x_&#@ z|BFZ6SU!iV8S4&8`(&9u-3j*k&-@z-SMwbEdo6c?@800wVcx~dVdtJW(_v2jB_9Dki9L^=N<7&N^ z!QOwY^9tCHb<|(wc#Xq4V#mY#;p<@Y%zf7D>J2pY%!)7(_9@)H8Q=3L*Ej8d2DbgoP22w*O+D>@0amN+e+jSce+Ac1 zy|(|gdK2IF<@!2~BWdd!uj(w=Eg5z@2A>)A1L?!iYW*QJ(hx0H;0QC(6^_pJ3awjyCK2PEtMX{ROtY zItTtnQ`g^jkn*(m4>;}lK2o0cI%XQxo^`ZY-*=ko)^`j#fz{IfXmH!F*TCp#>bZ8u z09#Mpd1zeEn|k7o3AS&EI~Kg%ud&h8)0c6;#z{Nl!i}S!wzQ+3cE$sH&eG2K@b>wd z08KsZObD*+OawQMe%jKGx;T6EIwzf*a@ShtsOxLuPfWe6)xHy!XRV$Dya{Wyx@*&< za6hh1+9u_XL;&R1#XYa)M)CdKMh#T`oB?M zZlAmdPYX6?_;hf6@^|s4hwGzm{ok>v#eW8{n)&!lIwQP&pPva$J^v?17qIoz9jlos z{WwXPU%mY^&ihtJjdEx4b;ro5HoWuFR?Q=LkntF0u0BqlC z{}x2kPn-F8-7f?-ey+8J;cE8D_8;Xuxi)u2v%Yrw<D zt_!uayBOGK9na_DU_YKu^(8peJfGs!SrY7cT4yP6>ZmWxp=KR%@>&LLUK!hE;cE6R zahHSpOcuU8+-Ek%a{Zaw|9*`H4IM?G~`1goXpmEg|7jO)s9eQZ~oF|@0@ek6BG zdHQdeHdX=q%zj0)-&+-~=I_}%CaZz{IF8y@=TLJ@#K~<9ux)2d)`Y8>n{m2<{TN5v zS{!P|5hu>t;H+<>(|5TxpBdHx`Y3x~fz|3d zw?3M>{$pa3$G>~yU+>QwpsDLW7XI@1Z`kiUn3zdU{27+lwnP2j`mXT82Q zMbl54F~%W=T4HPlHm}V4wgBJNjO7H_gZ+4})wki;mcw%`PMz(*zNbk}+r#Vp-T|(Ub4Z)*YFBqY z+m11fotQgh25ndeAlnr98I%z~-rKN=o@se0Syz z`mjH^?uuohh{$!#znKuzLz^(_)h+eqXTn9&OW6%Ei-zvp=w{gVBwvZ3aqt#`6%c z-+@!tJ|m@C`q!`2oe7&fbq@vm9XNICcA->Deusfw%e2i*DHqQIPJZ5h4@b{An-!bf z|8LIcuKwTwjFWx0kM_+RZOfR(vQF|n0{^_PnGIc@e2xTXO`jc`+;!V_jsl-YJJ!*b zwe)Co_ak*JJqArZd&*M^xV?ut9!))UP5`TAZ+RlvIO@i6|9BF34s!86;ke25 zOFJinZKoIZ^l<>1dfFKXR!f~z!0p^lMN?0m)4*!U?R2nl)RWs8U~|hm47q;haV`0r z+1R_of21#G!PV@?oYa#W%b!u64fYz#{mtJuJO@o(|GBWq(}!~#|Jv{K(A4#x8-KaE z*!KBg+tfA>r998|7l1G1unp^JkL@D#i#fdhyv|&EU7NE$TFSdox?Wn!S8KRs&dVj> zx^`a*SIfTSGO!=lVQqsr)Ql@m-!BKZ=i?P<>N$T`g4I&zDscOp4@Of@ovXoWIp^1a zjiYWHf0lSH*ykCq1vTgBb!h6DqjKw}?d!qqIeG({dfFZWR!f~5!R>r+LQ_wjo550_ArN<^Hc1+zl(m!)#aJLmhzb!f6M%AXE?b2Zu=u} zHFJs2qb;Au;A%tgNxqN6t*gy^pP=+(zS^GTP%}Sq#_%a{?g{gfzdZN1r@{XIv3kbp z88mh8U$*ls*pKaKdyYfRcEoAtd2rg9k9hL5^8(o4iC1?H_})g%F-t5B71LwZFAa&(w?{#q6 zTL_yx&qHs3{oBaa(PsY^pj6K_@FrNzH6$_LLQ~g&VQljFzuowIjm7^RG?oyO$srxb5zG_>VQXbnUVDr?r45d7K-A}>pbwgYcb3O4Iwa&8CQBT`nfF0AUOJBm(^v}BV6?4CB~0n`=V_{N_lKQS*NkB zL@AH$XKpKbL}r+pKFb0J?+L{fl`}z6>xp7{T2ORJGS4ze!OPYf9Lpv!)sP- zea9+mzUT2zbf0k(TkiT{EbCd{-1Yqn?6Yfp<<-|Z@%(m9pVVj*OZDTd+J2#B!S~D7Yo~c&HCeOHz4$l2>4Q%o}Q;h-knaX(9({B7#DYdyC zxMvv?oO`-)z3-2Ormp{*_{+uJz?q|KH8^>WjgPVQTbojzYj+&5&sFN$*P&EP-Em9Z zb+O4)cRaAqRqEDVk5VmTFh1BdQQP{I^4KN-d+xP$rO>}an5U-fkVwXFHW2p!QMBM$4qc@bBxSgu21~CfHU{q3(8}g z8C>^*v%u8`cKo;I&I(sI&(xa@Zr`%MnH{d?^_acY9B}Juvww3^`mul7=HgJZe`42) z>;={3>A$7Cqw%+tyAQFxdk=Fvx`XF2Y?Dx&%6Mdy8hl%D`@YC=&hQ9-`1iF24E@rMQiKZUD6!;->&3%4pxIXH+HkJXaJLjA`%Yywl zceE|Xq2{?4XH1s|XH2&x4|&>M0i1j5cG%>3R#*{y3+t18+8&!c_uZAit8?CsXFcud zyEf-xatCMp)D8R~TIQE?buBb?WBEIl@{G^g;QIeJtpiuf`B@k2$Np$r zk7Iof`y)??`;P=hC^d zId}^Wb?3*HU_ZvxwiUV3V6u=CY;SYskKDO_{n!fL+_N4ju`A5ZPq>Bn)_b}WaQZHY4;$ARlO%grmkmf2ez4?cmzT+)BF+7J7a*e8PPd^rhj zTgmxkxIXIUJb==VIcpopp=Qov<6hXo`+Ttb2y<|MAUB8jTFSG&S!Qfc1^fHHxmHhu ztDW32Du>g-e#}AJ860ZnAWob!!JhxbISa0KZpVKQ_r<> zE}C()IUeUx`Y{)6=X0o;i#YS+0&n57=J(EYr8k z8z1|Y9Iil9&$+o0tY$wQ*Q>yO%t6~=j;lG$L2R6(J9r-h&b6`ojH7tkxE7mvIfi>+ zlbgGH=Ig+xPcrKJQ`+>|jZ)qE`dklI)3z6-+__=9H-OzUscYYpQZ0E60jp{2K`GC8 z+z8HdR&u=wO zz@FUhMN^N@ePCns#>YJ5`ouo0vDZBAM^o3oC-LQp{{T31r584N=J|u*hd8XGEp0rE zW)8KD;b`i%(HmcR){{rT{$8TGbNo@bALqEX$2ioS<6_(K=U$J4Z71jI+79*sY80?lqa1mh!78ZNuxy`upMMbtm4x!Rh z^lycT>)+j&xib!Y?!>+XZTLOIXS4h1=iv3it1Jj40fABU}9a7XP5d zKWy=jTKwY{|D?q~E!^+De%9ijxA+$={$-1QRk;26y2Za~@o!uFyB7bxaP#}2aO3}2 zxa0d%;o5&L-1@&1uKm}-XJ_L4R=DwgFI@jWTKvxz|EqB4%io3j9_F7G@0j_&`KrBB z;kGwg;oFn{=!Ki#7=`OUX5re$D%|>G7jApwwD`D%8*jYAtv`O@j^_l0>px-P_J5)l zpSZ;*Dct&#w)kX)`#t5!3)g>&!tL*rg=_EJ;#0Nw)P>uhX$rUgw1xY338pJt`}8e7 zLyOPY;xn~)mlmJ7aQib$;kGwx;o4^_T>I>WJN|PNu6@oHpQ~`=&0VmU zmoD6VmnmHPvMs(`i!a~eD-`a0TCs5Bt<>TxxA-b8zG{oF*5a$T_!@;f-`6bM_PZ6X zeXYW^uifJ7wD`J(TYtU69l!MpH(vL`wQo?k_6-YnK5taG_KjP7lfsR+Y2o^BR=D=f z3)j9yi*MQDTebMsg*zX&Y4L3fH~w}lzI}`DP`LM_9Se6ncPiX?I~Q($b}3x@t}VV> z;jZVq7jFGM3b+2Ag=^odaO>|~xYtjQ!u8*$aO3rC@m?+7yT$h{-1T6;!i~3oiyzSN zyn8$lp6_hCHPFv)dqI_=hL_4b1+=(JbdzVsYBq_)#iS_AEh7n^V$yO zP;;*;PWy*}-SfKNcAqMb?QpQUX>-picQ3wwM@L70u;2Bzjy8StRd-*Wwk#9x2(Wu< zecj{AeNHgmkzoIJhI*d)jzSxTZ4gKPcE-_Y`f4+dduKJhU?V@u_`03r#(LcjIiZ_0&`E9PkKY+eh1$>!;mo=v?p! z?8enDKNp;9=sbA-?#4*`2~uJ%}oaRppGF?@znON=YQHO5tNwZ}`0!Ep7&@VQGZF|G#J7}vnn&L}aig{voq z&uVIkaUHnExE`)HxWu>tuAUfQ5koC8hJb5~8{ukC5hLS#6Z{@{T}y68(^s44%I8G2 z#JvSv8EnhBc>LKX>Ef|1GDVHhJ2;2W;Q9`K&8<4ZoMt`-NqE?*n^{<^OCM z23H&2!E-LxT~(YuDU-&X#9Ae+2CB zq^jrs`6!yY*VACumQyJogBx3$aeRJPOB;`a?UOd!l52DAd;;v68Q&+1ug~@Ji<7VGnhhKxM>2F+Z zYR;wPXqmBn9qicZ>pKp4@_z%IbK?6Bd2DZjou}6G-G@B3x4^G7w!bLlxd*)s9)Z@Q zW0b%9g?G@@jpch1x&GZc@pB1q`@Z}hn!5h~;4hE=h{nI}5k{h^>)$~skN^8%<5&L= z(A521e((Dqf}fyNH%2E)HGfxaB>G3-XE~RSpX20s*l+t{ezs{H>;4AUKV$VVICEq) zusn0*6R>kc-Ff#Zr61>=w$C`!oOfblI=?>$JEv`L46xjo{tV*_u)n9Iu6=Y$wdC_9 zIQjUFP@cG7ffIKuusqk@*I<8#N!_?(QmQ%5TXbSg1>07}*}3{HTwVXSX-jTf=J_31 ze{Ew^%5%=X2fM$@9P%9ffTphhIF$1E{|MGUZR`INn!5huQp)51GdOcD+Aob}jQkGM zuW0@bllj@EajdJa_3Za=;L$mHQ!iuuJ6t{Ym_NYnd(59`-oLcP?=P@*wVC&Llxm6l zH&`wFA8@^wbF;?N9c+KJjZZ0e4mj_|0Q)mA zb?vSzY8i_$o4WhpoA$>-Qx6{-?DItWJPuqR_1s^^1*^Lb+Rg;Dr?COF$z@Won)iIy>b)r^gR9%m)OAgt9L-$g zHw9cRe(qnbKP8$r8MkS{ z_4jk8gR5oSrU(0R+_cTWq2{=W)Ao#DuYcQcKQH(G6JJZY=fS#;eR7xyTyy9GS4$2v zgZ-F;wplo4^_K#xS$|^c%Wcbj$I@V9hA#uxC*R>)7Os!F^(Vom z7XRhIYUZ<1Cw$-|$S2plZLDBjj{JR<6~We1cdS;T^y64*TbV=6u@c+vq_i;rpY(eb z^me~jMN`kZxf*y7bv*JnS5`;UPg|aq*8rd^VhaEhno3|jpMqx4mf+sDQH8k&HKi>;Or}=#3s-0I;;ow?^jqyn>kKSsh;sz zAFP&fx4rIY>iT!aCXfFHjengt8=|S}KNbG+_;1wsXa8XQjnUNgpBjI8`nn0&`)1C? zrtsX~b1pVR(@$IcHV3N@C4ScZE#T^|e~G^(yvE-O-j2UDnts|6e;cs6b1C<)ZQ<%Q z{&sNtlzQ94)oQ&R;Of>J%=~w*>c_ALo+# zt{iI4C2{({8`%9~^4lG*b^>*6V_M>=#eWa*^Z09<9-BP-);+;~A5LBS43uiAyH}|@ zBQ|;J?hW?)aO&2biBc_fdw`uw+PYB6W7`L8p4w)nl;`_!J;8n-&RmRXU48VOj#9gL zR`3w|Y)r4CUg*y@{hSS(Jni)cyPvn-?AYY_p3=Txzo%q8>uJ|-7D{c#a4p*p?0Tnd z4oZ3A?+$8)hYm+~-LLCVe>C;n?~VYgrOuJy_PydL zH1*Uu8myN4-Z5a~s2j(7+p*xTu5&HI{qB zo76uYO!oGR!?|Gnb5A)Bu9iLZ`Cvb;yV@?`P&1A=eZ3Ie zo_`mispouM3|32>OTg`Od?}iG>Rbj^%Q+qdHjcV+e(%J4FYt&iqvpPvbMgu_^~_1R zb<_5h;P#xn3Qaw24+g8H&eh;{zSp3sr_Qxtwd8vp*f{FR_j+(U-y6`>Gf(B#^*p)0 z4gq_9)4v)v80+_{+j$6au>q14Ns;cmEfwYe|2htiMpQ`@~9 zYR*rw^ZtHf%GKqWzn1cq8h^_?Yu*R0f9qiwT+LkKbAQX{0l3-_e3I{jaO-L_--jsu zn6I{nIfiqXpEzUq2-s(;jNzkjHSb3m!^hz2xmF(s`!Tk*CpezuFt#|cp91H3Z3)Ik zo_pofVE?{@y64?Fq?WqRfYrKpjNLJkww{HnoeR!kz2ETB=J=YIIhcz!-!1sAq1yD3 zJ3jh4CT-u?EcNZ$_*y2v=fLUPlJr4t-%h2y=fVCx33boK3vfS*RWF0{e6kESd7e*R0sDMn9c|WMic&q-*{k}ac^#*{*U;4UUlyA@{;xOw zUdQo&15I83RQ`dia{N;)DHn{Ge-;r}<|GWbJYVm&;oc;4k*yNc@ z?}6PvtJ~JflxnFvqSRdln>=+#g55u>TX$7TwbXrI+}Kv5{Fif1+^2nj%{;BQI;A}O z=MTZ|pUuUX*40Pf6)CmbhV$hku+O)7_wzAa&Fjr|z&dMCM?HCc0(MMu-F^yJ(?8ek zXK;1n7-LQR)za?gV70{j0&Y9W<4d?ct_9kXhkELK1+M4uYxpqy2XW*%^BXk%v>Brt zZKx&2x9Vux)~1xl_8nM%ZR=3VWBVTL`mb$WZ1Q}s^#`zjPr`WC)2`oIl-k7WfqjQy zJkP_A=-Rzze**jQnpOXq;};IES+VsUtK3(t|10|EUH*$Lcl|Jy^{k(^egpe+^yDG0 zzSfEF?_hJy`;R~1YI*((>A~+*XF$b2b_EE zhS=nJPV1O;)N`8gtf$@h>r-lTJ#b&t2|S|9f9s8Vi_y^3_1_48xp-r6=IAC3PM)LV zV{H94rIhE|9RuvMn!5JQDAiJT%u;uAZ1U6{3+%I+x^=gpRLdBQ4R%e`wk4%JwsF9o zdu>}$%3~W>+}O6Jl;_!PJh0Dh_T4_)S97*)+c1`O97CT2#>d~aB5SJ80TZC9+x9lt ziTbozdZZ@Nx_}FjQYJoV`5g(cI@H-V|s)Pw21RILTwmhG#A73|F%qug|H#ew_2#rshy{&WjUg z8u0sF{>x)pxVbq-<}TMK{?mao_dmc-9^3R_*PE=lGr-jb5-WMm2v^U#;Po^U+`eVM z*#)lVTAlsd%y8>!vwyQt`mul7X5~<`e`42)`-v%6m#6=h@~i=t_OX>*Jhp!DMyYg>>*%`p`x z_CnzIyZje>VYr&HjnftE$2i&+;ZQS0f$Pt)me$5m=jJkSe?FErmW7XmXAiI(+6)ebfgx`|4G}#?t1vuSV&|ao4swhnnLq zPCjdZb4{D4-0LI0mhoK^T>H|kshhs61=mO29M-1vV-DKZ;ZQRNar(C|IQLuoEzdbS zjr!)3dX}lT9ysssc418A$#s2joCsKnb4 zZoJITjo@m|&-iTI_+)-;0ymDn+HA))MBP5+Iz%u>b8vG`8GC#M0 zs~O*ZYzy{dKeTPfp=LkCiL*WU{VxBlb34G*hEq2_JHle2VTrK0f7ub(|(zZ8;nsLQ>Z`}htsLQB+dH=S( z_*tfJ`+!Gaw{OXzCz^WBaWAl1*7@FGKjxrqUyl7a%t34%=j#68%+)>UqdaXK05&hj za8GPKRUVY_|6{_cyq_Pr?8l2>1_nznr? zjQr>iYM@U!Hdk{lI?z+c?_v=|QQUz8(s;uW92j_z3vEd(GoSG+S0}V z^bzQ_je%(Dwy`h1@~kJPfc?D*b=Qtl;eMRs+D_w8bB>E`=l4#`Q?TviT=_GOGtks+ zXFuY}ZRZ&L&jde(zq;3CVp__*wrs=e$@=@_m$l}A2B+_5;qwr&^wC!98^`f`hJ4Nj zkHF9Ge~at)zgOst1D|uym!Q1?ooKJY>yFOPz8bs*`uPQ4Q1FFdbH0eu?|fVDVlc0h z{x83zaKB@HY2n**JzrM1ISeY?wl6PS`xS*-|H>A>s_<_34{q_R3!j}tuPNO4*A{O3 z*R}Zdh5MX;L*ceJq{VM+@tX?YhWu|X-1cuN-1cuRd|Lc(D_r~Sg*z|rXt;C0y}_Mu z&!uz9b0c>So9j@pTIR67lX(|hT_5{*cjJ@m$@dudpsDA(JokdFr=HmNfsLK@!q~&$ z>PHY;em_{v`sSxiExA7cHikCmkzDK?dY1ah@j~0YIe;&Shm$S~|u5~@{`|dq^@0l|*=iGBIpN{`?#co{fa@VbO_!(z7 z-1U=d>Ikq}?K2YYx{A*zu$n%($9W!ZTW#)BqdEF=pVIaMZ#CDW*!*4-Uj)0Zt}do6witftL%D9^R{Rj}7$b?vTOwX}T=oVKo6 zxnt_Qd>!ohuIJ7haCKw3rseT}6RdwdkKcl;>+hPE$Nz0`=4P~aie`-bw)kB%e_L#Q z<}{9N_09Oc2exnKaqp04eBTFq?zOp($i?m<-q)GiHTD5KbIi8R-w)B$^>>evr`C_a zsb7EB(ik*#{oRA)@&6dCe?2~*z}5A4kCMm#Q*hJ&GcfuUP`V{>_36+cU{NN zaP@q@{R>zvZGHvYrp`MSuAVmIz-swU{5P<1)Q#g@8xOX&>}PWQlIM4@c`{e*-yd-G z!`o6fDZnLG8iMN?0`a@!{NBye+Q@3-Db z(bSWBGO$|Ov;#Nmog7U)ZKeRLrQRvQ#!*kbQ-K}Jy56bL)N?(M+ctA}8nAOXwX}yD zBmQ!I;y*3eJv#o=75{vOkn0os^x#3Iro@;5O+B9>W(2F%pCM*~TSq=a$n{IynZeDt z9njPhwbtYGyAI9Ivm%?4L@Uv%x>OD$^2KRdX2&gMWhOIP<{O`d~Bvvu#?RPtJ>Gd+pXVls0M^*G^!y{El}%xY|f! zr0)6Qj%%JV$o13ioL`{Wv)=q1(HTwMocGaIp3e~rg8dw!?tTAgj$ZRUTGwdWFNBXd zz3&(2zTdUszx(#JFuM2s?g@*4jj<@l{OzX2z`2jtXYsV<`eR6!4 z1grH1d#|z-*q8Sz+Lq?6W=?UQ*DeEImUoX5cR8?{In6Im&gH>s;VXcx;Y@0A<`lPCh_~&vr?1$KJ?SQ6!I`i9nJA&10YfNow ziP;TY$J_~?8WMA7H1)*X1*~RUV`@`NKXwHhqpoW=xO(#L4tDH%l{)u;>!WT=$6C$( z%zo?%?uWm+cF&DkYTOH~=J^)qeEUr0{c^tdM$h@)r(i$Z?_02IZ$Gf}V1JI82M2&N z5A;8qieXIA}bKvTU-3zQ%_w8JG-M8Lw{nYEeod>sX#`nC*^-cahVDs14h`w<3HM~a5LuRXzJGMT9s$MTmg2E z(B`_8d+qUl=1Q>t|9abK)5o>1o_tq<%~!91tKsVUyVm8&cMUlCT=(+i8w56=ZM50m zy+b|u27}F4&x>o}>iWA^$dm6naPqlt$dm7Su=#AG&Gzm+>b7?bZUCz#|BYbt*X!aY zxO&#}&0yQ9J2%tz7I=Lw<@zSqtzi2b|J%ULzTS?e9-krL#7v*>KvPdWcY=*=TWx83 z7dUP0yIkMYb2r%Y9shg4)>EJBd*SNw846C!)N>!2dg{3!Y;4!0P$V|5317_Rhz^ zzT7*tJwb7m$o&sA#eh>3B zSYLJH%Z+a>@`1EZUw7l$>9ZhT&tUi7z-Q~v7Hq$tEA5AK^nO752ypHP^cmUUqbjcd z^MzY;3%N$a&5=I505*^3$XN2+_q+%mK%MIO8!0b=)&2cJ3-MkC8&lmF)~A;EuYeQZ z+T^L_Rj~fHvrf6|-LZKM?C+&*qs{SC%lLV3)wzt{>*%?+dIRj3zsWK8P;Y_jF?+kg z-)Zo78~nWnf4{*$Xz&jk{G*EdIdx3MJ%=AR_$LkiX~q3K{#nIs|9Qo|C;Oss@3~x4 zU&6oQ?fuf%9DTVi)W6}a=9r1I4!#9v9c)$ZgTF&l_vJZrT-CDXzXv=1?_tmR{{gO^ zb^0S%tuH>w^%LA&+LG&Mu(~gEdCt@l_ZP7F!+!nqXMlCh|1y)Oqe}mQZbxz6S z`w!R}!~X^AlbYHr$eMtw=N_vqntJxYN#wjd*V-oKJsEG$wK%ccf$Ln8!|PmA!1YmY z&-!$Hrvx+g+HAyId&W`SJ>J~rF<h>vlJl1`*ugNh9yBl5xR2mh-y?TI*Ou7xf$P}w!;P(t}U?_1Shukh2X~4Pn)sF5L-QUFAPrX`aX3LbZv>fC^)g5|BJzmt)Dhy zyN|0U_Tu2guAfINfvzpFmju_bmx3ExKW)bLnxLN8OM?^J=j@5K47#@PWx;jq<>1EF zPn)s5zNj19>&x=s#Ln|1pZTtUt}U@w1ShukmEgwKPn)s5R;j1%F5txWIeucTjIJ&G zf8fM+yjOu6TR&~a_PVE@xxFgb*!A^iH8k~He^v*p^(B@uyjH52*K6e(U^Un3nqXh9 zRrR%atGQOisda6z>nwGw15X|1kn0owu3+bQ#$?^%pS5EfHT}|NJ+SjDZPu^aVpHo&4R1iH5P9a;Q(7C-A!(=R?p6(3_|A2HU^ z=-T3UOz|_8e#TPM&wHh;6_4`lqaNiQIl7iS%JZ4TWBrW4e=jw1F1|bGcr3nM$m94O zSK;F;d;-|GCvx;N(ALi*!&nd2a2A?= z+S2Z9u(9&p?>S&KeUnEm{=LBZ+edNw=$OwtH~-;vE_%kPcfpSTd0_k4hhv^q^aba0 zcZW%Nwp($>`GN-TSGe^!<`=@vVZY@Ufj{N=Byanzb}^cM+R~3pz}Ayz7X86$d1f&H zY&&(w!8+BF_foK$HfxmUnaE{e|2?C6_WXfh`Jx}7Cr!m-;<~+GJu15DfIqugK?7lguVE2i^6~4B@*A?t}y}rUX zfIY`Ia`YVgebi0h%wzN43|F%@accA4s!N$e^|QTO@G<7C9J4lV11F|)`*yf~o)2yE zjNcHje^*#N*X}#O>V0Wz9rC)4RVGr$o%mSCT^v)#-Qc>8d*J$6hcZxNW zSpBk6hdg!IuWie@s`rii@X5Y$KiHZc;AlVm3|zM#So$$LMW)SzrOiVevmPD>t7rds z1gz#gpK;}N+$$#%_fdST)q9%g=d^7~xxn0YYp zdJ5fsJx)DOgU$a8$2_Nb7Tk+{JL~2-xPDoOa(!YSUhG*{a>um3zL@{#l79sDp7DPTO+C+_UI(k$ z);X$8Eiv8zTYG(f_a3OPsI4)?3H<2Cg2TZ^6xaze7_`z2Ae? zY-@gPYKid!xEbR|H1+uWRD9}MenwMIo?pOfwl$A7wdT6=Z&rD2`W2h&%4^fug1y#^ zD|kHkH?Zq!JjZ4n|2?{KelPxB*ZwHjIDc07FR*d`=GcrgOQ+U2{}g}g`M1LUP$v6o zTd;8^;aJD#>{yuAXbglwdXYCHF>o_C|B<-iiP4 zs&h?+kF`zBF}bDztJix{&&9=tWrk0pP?7*t*hqI(Yg_f88H?Alov?D<@T zW6tNI;QD&DSc5O#;7c_4k`2C8gD>6S%QX1175ACIauxUcuH_4N%v>WY!1I0AieO(p zw^3h-x0-zv=R2q_;34?=y>GsQS{Y5b}h77^`J&tpPTF_?lpS@?FweV71(1tqrz~y74?m6Ev{r=Th=yj;#Z) zzwhb_R~w8^zVBKW-UqHN^LssTbaQx~)l%d7V71h^0a#66=YTxE8-lGdd?TCeVs zeblpGZ30%$cSDVmKIJvrk>s&j*>s&j- z^-)isb^+TbzwgqXKB?QM8H*5-AmNdW?!&-TKIn8I>!ERW9X;N816snsqFx; zwbkqVK)8D19t5uA9t=0Ge%g%dKBjJ5_pw93#?5`H&$|zWt0!)EaN=q|3~pTgv>Dg^ zP(Ahb05|JB98Ep@(h*=a_a$SvhpL&^J@iPhn&PPcd^Bjs(>oH)*KXn`n zwvO(!HHTcE_#X#$thITLv9hVcqjzh-wByh%7f4M%+JPLI+3=cO52{r-#Ktv@jt!c z@3myv&aJtY;u>c?XW-*C-Myi{ruWCsS4)Z8(vIhV1^apRtO}nEHr6>DbA9gxuCMRs zHhAv_Kd-_2G=d;$;U^UyCUz=KDTmx>#7=)&tF>4$KYDHKI+Ewe5)nqbzsLJ z`;`5>9Nq#S)#GzBxLN-#XzCe*Tfu6!HNQ5s#JCOI zjBz`fdd6S~xE_N$;QFW=(>bk{n0JC51NXAbgS+7B`a9?48H2mQj)8HsCC)uy$DodL zFI+u7L&44Z??Y417~Bt5v#t5HsU^k(;AV^m(bVJfQ1PkP+{0+<$@2(U&9>&zrdIEZ zUN5}g_BC$u*8SyCd@|OLf$!$Cb?$o}hpT;of9@NefUDegXPkz^jZuG28v$2OA4h`g`7{cy ekGkV#4z noundef [[vert_attrib:%[0-9]*]]) - -; Reach the end of the hs shader -; SHADERTEST: ret void - -; Make sure that the GS shader does not have the vertex attribute. It is not needed if it does not call the original -; vertex shader. -; SHADERTEST: define dllexport amdgpu_gs void @_amdgpu_gs_main({{.*}}, i32 noundef {{%[0-9]*}}) - -; SHADERTEST: {{^//}} LLPC final pipeline module info - -; Test that the vertex input is correctly loaded. Make sure that %VertexId is used. -; SHADERTEST: {{^//}} LGC glue shader results -; SHADERTEST: define amdgpu_hs { {{.*}} } @_amdgpu_hs_main({{.*}}, float noundef %VertexId -; SHADERTEST: [[vgprAsInt:%[0-9]+]] = bitcast float %VertexId to i32 -; SHADERTEST: %VertexIndex = add i32 [[vgprAsInt]], %BaseVertex -; SHADERTEST: [[TMP38:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP37:%.*]], i32 %VertexIndex, i32 0, i32 0, i32 22, i32 0) -; SHADERTEST: [[TMP39:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP37]], i32 %VertexIndex, i32 4, i32 0, i32 22, i32 0) -; SHADERTEST: [[TMP40:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP37]], i32 %VertexIndex, i32 8, i32 0, i32 22, i32 0) -; SHADERTEST: [[TMP41:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP37]], i32 %VertexIndex, i32 12, i32 0, i32 22, i32 0) -; SHADERTEST: [[TMP42:%.*]] = insertelement <4 x i32> poison, i32 [[TMP38]], i64 0 -; SHADERTEST: [[TMP43:%.*]] = insertelement <4 x i32> [[TMP42]], i32 [[TMP39]], i64 1 -; SHADERTEST: [[TMP44:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP40]], i64 2 -; SHADERTEST: [[TMP45:%.*]] = insertelement <4 x i32> [[TMP44]], i32 [[TMP41]], i64 3 -; SHADERTEST: [[VERTEX0_0:%.*]] = bitcast <4 x i32> [[TMP45]] to <4 x float> - -; SHADERTEST: AMDLLPC SUCCESS -; END_SHADERTEST - -[Version] -version = 52 - -[VsGlsl] -#version 450 - -layout(location = 0) in vec4 _14; -layout(location = 0) out vec4 _18; - -void main() -{ - _18 = _14; -} - -[VsInfo] -entryPoint = main - -[TcsGlsl] -#version 450 -layout(vertices = 3) out; - -void main() -{ -} - - -[TcsInfo] -entryPoint = main - -[TesGlsl] -#version 450 -layout(triangles, ccw, equal_spacing) in; - -void main() -{ -} - -[TesInfo] -entryPoint = main - -[GsGlsl] -#version 450 -layout(triangles) in; -layout(max_vertices = 3, triangle_strip) out; - -void main() -{ -} - - -[GsInfo] -entryPoint = main - -[FsGlsl] -#version 450 - -void main() -{ -} - - -[FsInfo] -entryPoint = main - -[ResourceMapping] -userDataNode[0].visibility = 1 -userDataNode[0].type = DescriptorTableVaPtr -userDataNode[0].offsetInDwords = 0 -userDataNode[0].sizeInDwords = 1 -userDataNode[0].next[0].type = DescriptorConstBuffer -userDataNode[0].next[0].offsetInDwords = 0 -userDataNode[0].next[0].sizeInDwords = 4 -userDataNode[0].next[0].set = 0x00000000 -userDataNode[0].next[0].binding = 0 -userDataNode[1].visibility = 31 -userDataNode[1].type = PushConst -userDataNode[1].offsetInDwords = 1 -userDataNode[1].sizeInDwords = 12 -userDataNode[1].set = 0xFFFFFFFF -userDataNode[1].binding = 0 -userDataNode[2].visibility = 8 -userDataNode[2].type = StreamOutTableVaPtr -userDataNode[2].offsetInDwords = 13 -userDataNode[2].sizeInDwords = 1 -userDataNode[3].visibility = 1 -userDataNode[3].type = IndirectUserDataVaPtr -userDataNode[3].offsetInDwords = 14 -userDataNode[3].sizeInDwords = 1 -userDataNode[3].indirectUserDataCount = 4 - -[GraphicsPipelineState] -topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST -patchControlPoints = 3 -deviceIndex = 0 -disableVertexReuse = 0 -switchWinding = 0 -enableMultiView = 0 -depthClipEnable = 1 -rasterizerDiscardEnable = 0 -perSampleShading = 0 -numSamples = 0 -samplePatternIdx = 0 -usrClipPlaneMask = 0 -alphaToCoverageEnable = 0 -dualSourceBlendEnable = 0 -colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM -colorBuffer[0].channelWriteMask = 15 -colorBuffer[0].blendEnable = 0 -colorBuffer[0].blendSrcAlphaToColor = 0 -nggState.enableNgg = 0 -nggState.enableGsUse = 0 -nggState.forceCullingMode = 0 -nggState.compactMode = NggCompactDisable -nggState.enableVertexReuse = 0 -nggState.enableBackfaceCulling = 0 -nggState.enableFrustumCulling = 0 -nggState.enableBoxFilterCulling = 0 -nggState.enableSphereCulling = 0 -nggState.enableSmallPrimFilter = 0 -nggState.enableCullDistanceCulling = 0 -nggState.backfaceExponent = 0 -nggState.subgroupSizing = Auto -nggState.primsPerSubgroup = 0 -nggState.vertsPerSubgroup = 0 -dynamicVertexStride = 0 -enableUberFetchShader = 0 -enableEarlyCompile = 0 -options.includeDisassembly = 0 -options.scalarBlockLayout = 1 -options.includeIr = 0 -options.robustBufferAccess = 0 -options.reconfigWorkgroupLayout = 0 -options.shadowDescriptorTableUsage = Enable -options.shadowDescriptorTablePtrHigh = 2 -options.extendedRobustness.robustBufferAccess = 0 -options.extendedRobustness.robustImageAccess = 0 -options.extendedRobustness.nullDescriptor = 0 - - -[VertexInputState] -binding[0].binding = 0 -binding[0].stride = 32 -binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX -attribute[0].location = 0 -attribute[0].binding = 0 -attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT -attribute[0].offset = 0 -attribute[1].location = 1 -attribute[1].binding = 0 -attribute[1].format = VK_FORMAT_R32G32B32A32_SFLOAT -attribute[1].offset = 16 diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineGs_VertAttributeShort.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineGs_VertAttributeShort.pipe deleted file mode 100644 index fe27bb89da..0000000000 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineGs_VertAttributeShort.pipe +++ /dev/null @@ -1,80 +0,0 @@ -; Test that code that merges the VS and GS generates valid code when there is a vertex fetch of a short int. - -; BEGIN_SHADERTEST -; RUN: amdllpc -enable-relocatable-shader-elf -o %t.elf %gfxip %s -v | FileCheck -check-prefix=SHADERTEST %s -; SHADERTEST: {{^//}} LLPC pipeline patching results - -; The i16vec4 fetch comes in as a <2 x float>, and passed on to the vertex shader. -; SHADERTEST: define dllexport amdgpu_gs void @_amdgpu_gs_main_fetchless({{.*}}, <2 x float> noundef [[fetch:%[0-9]*]]) -; SHADERTEST: ===== AMDLLPC SUCCESS ===== -; END_SHADERTEST - -[Version] -version = 52 - -[VsGlsl] -#version 450 -#extension GL_AMD_gpu_shader_int16 : require - -layout(location = 2) in i16vec4 _8; -layout(location = 2) out i16vec4 _9; - -void main() -{ - _9 = _8; -} - - -[VsInfo] -entryPoint = main - -[GsGlsl] -#version 450 -layout(triangles) in; -layout(max_vertices = 3, triangle_strip) out; - -void main() -{ -} - - -[GsInfo] -entryPoint = main - -[FsGlsl] -#version 450 - -void main() -{ -} - - -[FsInfo] -entryPoint = main - -[ResourceMapping] -userDataNode[0].visibility = 8 -userDataNode[0].type = StreamOutTableVaPtr -userDataNode[0].offsetInDwords = 0 -userDataNode[0].sizeInDwords = 1 -userDataNode[1].visibility = 1 -userDataNode[1].type = IndirectUserDataVaPtr -userDataNode[1].offsetInDwords = 1 -userDataNode[1].sizeInDwords = 1 -userDataNode[1].indirectUserDataCount = 8 - -[GraphicsPipelineState] -topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST -colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM -colorBuffer[0].channelWriteMask = 15 -colorBuffer[0].blendEnable = 0 -colorBuffer[0].blendSrcAlphaToColor = 0 - -[VertexInputState] -binding[0].binding = 0 -binding[0].stride = 8 -binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX -attribute[0].location = 2 -attribute[0].binding = 0 -attribute[0].format = VK_FORMAT_R16G16B16A16_SINT -attribute[0].offset = 0 diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_16BitInput.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_16BitInput.pipe deleted file mode 100644 index b0d986a292..0000000000 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_16BitInput.pipe +++ /dev/null @@ -1,63 +0,0 @@ -; Test that a 16-bit input is correctly returned by the fetch shader, and then correctly read by the vertex shader. - -; BEGIN_SHADERTEST -; RUN: amdllpc \ -; RUN: -enable-relocatable-shader-elf \ -; RUN: -o %t.elf %gfxip %s %s -v | FileCheck --match-full-lines -check-prefix=SHADERTEST %s - -; Make sure the vertex shader truncates the first 16-bits before using the input. -; SHADERTEST: // LLPC pipeline patching results -; SHADERTEST: define dllexport amdgpu_vs void @_amdgpu_vs_main_fetchless({{.*}}, float noundef [[input:%[0-9]*]]) #0 !lgc.shaderstage !1 { -; SHADERTEST: [[cast:%[0-9]+]] = bitcast float [[input]] to <2 x half> -; SHADERTEST: [[value:%[.a-zA-Z0-9]+]] = extractelement <2 x half> [[cast]], i64 0 - -; Make sure the vertex fetch shader zero-extends the values that was loaded before loading it. This needs to -; corresponds to the vertex shader. -; SHADERTEST-LABEL: // LGC glue shader results -; SHADERTEST: define amdgpu_vs {{.*}} -; SHADERTEST: [[VERTEX2_0:%.*]] = call half @llvm.amdgcn.struct.tbuffer.load.f16({{.*}} -; SHADERTEST: [[cast:%[0-9]+]] = bitcast half [[VERTEX2_0]] to i16 -; SHADERTEST: [[zext:%[0-9]+]] = zext i16 [[cast]] to i32 -; SHADERTEST: [[result:%[0-9]+]] = bitcast i32 [[zext]] to float -; SHADERTEST: [[ret:%[0-9]+]] = insertvalue { {{.*}} } {{%[0-9]+}}, float [[result]], 18 -; SHADERTEST: ret { {{.*}} } [[ret]] -; SHADERTEST: ===== AMDLLPC SUCCESS ===== -; END_SHADERTEST -[Version] -version = 52 - -[VsGlsl] -#version 450 -#extension GL_AMD_gpu_shader_half_float : require - -layout(location = 2) in float16_t _8; -layout(location = 2) out float _9; - -void main() -{ - _9 = float(_8); -} - - -[VsInfo] -entryPoint = main - -[ResourceMapping] -userDataNode[0].visibility = 1 -userDataNode[0].type = StreamOutTableVaPtr -userDataNode[0].offsetInDwords = 0 -userDataNode[0].sizeInDwords = 1 -userDataNode[1].visibility = 1 -userDataNode[1].type = IndirectUserDataVaPtr -userDataNode[1].offsetInDwords = 1 -userDataNode[1].sizeInDwords = 1 -userDataNode[1].indirectUserDataCount = 8 - -[VertexInputState] -binding[0].binding = 1 -binding[0].stride = 2 -binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX -attribute[0].location = 2 -attribute[0].binding = 1 -attribute[0].format = VK_FORMAT_R16_SFLOAT -attribute[0].offset = 0 diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_VertShortInput.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_VertShortInput.pipe deleted file mode 100644 index ff067c559d..0000000000 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_VertShortInput.pipe +++ /dev/null @@ -1,68 +0,0 @@ -// Tests that the 16-bit vertex inputs are correctly loaded in the fetch shader. - -; BEGIN_SHADERTEST -; RUN: amdllpc -enable-relocatable-shader-elf -o %t.elf %gfxip %s -; RUN: llvm-objdump --triple=amdgcn --mcpu=gfx1010 -d %t.elf | FileCheck -check-prefix=SHADERTEST %s -; SHADERTEST-LABEL: <_amdgpu_vs_main>: -; SHADERTEST: tbuffer_load_format_d16_x {{v[0-9]*}}, [[vert_offset:v[0-9]*]], [[vert_base:s\[.*\]]], 0 format:[BUF_FMT_16_SINT] idxen -; SHADERTEST: tbuffer_load_format_d16_x {{v[0-9]*}}, [[vert_offset]], [[vert_base]], 0 format:[BUF_FMT_16_SINT] idxen offset:2 -; SHADERTEST-LABEL: <_amdgpu_vs_main_fetchless>: -; END_SHADERTEST - -[Version] -version = 52 - -[VsGlsl] -#version 430 -#extension GL_AMD_gpu_shader_int16 : require - -layout(location = 0) in vec4 _4; -layout(location = 1) in i16vec2 _6; - -layout(location = 0) out i16vec2 _8; - -void main() -{ - gl_Position = _4; - _8 = _6; -} - -[VsInfo] -entryPoint = main - -[FsGlsl] -#version 430 -#extension GL_AMD_gpu_shader_int16 : require - -layout(location = 0) out i16vec2 _4; -layout(location = 0) flat in i16vec2 _6; - -void main() -{ - _4 = _6; -} - -[FsInfo] -entryPoint = main - -[GraphicsPipelineState] -colorBuffer[0].format = VK_FORMAT_R16G16_SINT -colorBuffer[0].channelWriteMask = 15 -colorBuffer[0].blendEnable = 0 -colorBuffer[0].blendSrcAlphaToColor = 0 - -[VertexInputState] -binding[0].binding = 0 -binding[0].stride = 32 -binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX -binding[1].binding = 1 -binding[1].stride = 4 -binding[1].inputRate = VK_VERTEX_INPUT_RATE_VERTEX -attribute[0].location = 0 -attribute[0].binding = 0 -attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT -attribute[0].offset = 16 -attribute[1].location = 1 -attribute[1].binding = 1 -attribute[1].format = VK_FORMAT_R16G16_SINT -attribute[1].offset = 0 diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_VertexFetchDouble.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_VertexFetchDouble.pipe deleted file mode 100644 index 3450258aac..0000000000 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_VertexFetchDouble.pipe +++ /dev/null @@ -1,72 +0,0 @@ -// Tests that a double vertex input is pass in two vgprs - -; BEGIN_SHADERTEST -; RUN: amdllpc -enable-relocatable-shader-elf -o %t.elf %gfxip %s -v | FileCheck -check-prefix=SHADERTEST %s -; SHADERTEST: {{^}}// LLPC pipeline patching results -; Check that the last input parameter is a float2. -; SHADERTEST: define dllexport amdgpu_vs void @_amdgpu_vs_main_fetchless({{.*}}, <2 x float> noundef %0) -; SHADERTEST: {{^//}} LGC glue shader results -; SHADERTEST: {{^;}} ModuleID = 'fetchShader' -; Check that the last element in the return type is a float2. -; SHADERTEST: define amdgpu_vs { {{.*}}, <2 x float> } @_amdgpu_vs_main -; SHADERTEST: AMDLLPC SUCCESS -; END_SHADERTEST - -[Version] -version = 52 - -[VsGlsl] -#version 450 - -layout(location = 0) in double _30; - -void main() -{ - if (_30 < 1.0000000000000000818030539140313e-05lf) - { - gl_Position = vec4(-1.0, -1.0, 0.0, 1.0); - } - else - { - gl_Position = vec4(0.0); - } -} - -[VsInfo] -entryPoint = main - -[FsGlsl] -#version 450 - -void main() -{ -} - -[FsInfo] -entryPoint = main - -[ResourceMapping] -userDataNode[0].visibility = 1 -userDataNode[0].type = StreamOutTableVaPtr -userDataNode[0].offsetInDwords = 0 -userDataNode[0].sizeInDwords = 1 -userDataNode[1].visibility = 1 -userDataNode[1].type = IndirectUserDataVaPtr -userDataNode[1].offsetInDwords = 1 -userDataNode[1].sizeInDwords = 1 -userDataNode[1].indirectUserDataCount = 4 - -[GraphicsPipelineState] -colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM -colorBuffer[0].channelWriteMask = 15 -colorBuffer[0].blendEnable = 0 -colorBuffer[0].blendSrcAlphaToColor = 0 - -[VertexInputState] -binding[0].binding = 0 -binding[0].stride = 8 -binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX -attribute[0].location = 0 -attribute[0].binding = 0 -attribute[0].format = VK_FORMAT_R64_SFLOAT -attribute[0].offset = 0 diff --git a/llpc/tool/amdllpc.cpp b/llpc/tool/amdllpc.cpp index b99d7bca89..ee5a2ddee5 100644 --- a/llpc/tool/amdllpc.cpp +++ b/llpc/tool/amdllpc.cpp @@ -43,7 +43,9 @@ #include "llpcShaderCacheWrap.h" #include "llpcThreading.h" #include "llpcUtil.h" +#ifndef LLPC_DISABLE_SPVGEN #include "spvgen.h" +#endif #include "vkgcCapability.h" #include "vkgcExtension.h" #include "lgc/LgcContext.h" @@ -504,10 +506,12 @@ static Result init(int argc, char *argv[], ICompiler *&compiler, ShaderCacheWrap *static_cast *>(opt) = "."; } +#ifndef LLPC_DISABLE_SPVGEN if (!InitSpvGen(nullptr)) { LLPC_ERRS("Failed to initialize SPVGEN\n"); return Result::ErrorUnavailable; } +#endif // Check to see that the ParsedGfxIp is valid std::string gfxIpName = lgc::LgcContext::getGpuNameString(ParsedGfxIp.major, ParsedGfxIp.minor, ParsedGfxIp.stepping); @@ -849,7 +853,9 @@ int main(int argc, char *argv[]) { // Cleanup code that gets run automatically before returning. auto onExit = make_scope_exit([compiler, cache, &result] { +#ifndef LLPC_DISABLE_SPVGEN FinalizeSpvgen(); +#endif if (compiler) compiler->Destroy(); diff --git a/llpc/tool/llpcAutoLayout.cpp b/llpc/tool/llpcAutoLayout.cpp index f079092430..69f238e95f 100644 --- a/llpc/tool/llpcAutoLayout.cpp +++ b/llpc/tool/llpcAutoLayout.cpp @@ -41,7 +41,9 @@ #include "llpcCompilationUtils.h" #include "llpcDebug.h" #include "llpcUtil.h" +#ifndef LLPC_DISABLE_SPVGEN #include "spvgen.h" +#endif #include "vfx.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Support/Format.h" @@ -260,13 +262,16 @@ void doAutoLayoutDesc(ShaderStage shaderStage, BinaryData spirvBin, GraphicsPipe // Remove the unused variables. void *optBuf = nullptr; unsigned optBufSize = 0; + bool ret = false; +#ifndef LLPC_DISABLE_SPVGEN const char *options[] = {"--remove-unused-interface-variables", "--eliminate-dead-variables"}; - bool ret = spvOptimizeSpirv(spirvBin.codeSize, spirvBin.pCode, sizeof(options) / sizeof(options[0]), options, - &optBufSize, &optBuf, 0, nullptr); + ret = spvOptimizeSpirv(spirvBin.codeSize, spirvBin.pCode, sizeof(options) / sizeof(options[0]), options, &optBufSize, + &optBuf, 0, nullptr); if (ret) { spvBuf = optBuf; spvBufSize = optBufSize; } +#endif // Release optimized spirv data. auto freeSpvData = make_scope_exit([&] { diff --git a/llpc/tool/llpcCompilationUtils.cpp b/llpc/tool/llpcCompilationUtils.cpp index d83dc841d2..83386ca47a 100644 --- a/llpc/tool/llpcCompilationUtils.cpp +++ b/llpc/tool/llpcCompilationUtils.cpp @@ -66,7 +66,9 @@ #include "llpcSpirvLowerUtil.h" #include "llpcThreading.h" #include "llpcUtil.h" +#ifndef LLPC_DISABLE_SPVGEN #include "spvgen.h" +#endif #include "vfx.h" #include "vkgcElfReader.h" #include "llvm/ADT/ScopeExit.h" @@ -123,6 +125,7 @@ void cleanupCompileInfo(CompileInfo *compileInfo) { Vfx::vfxCloseDoc(compileInfo->pipelineInfoFile); } +#ifndef LLPC_DISABLE_SPVGEN // ===================================================================================================================== // Translates GLSL source language to corresponding shader stage. // @@ -257,6 +260,7 @@ Expected compileGlsl(const std::string &inFilename, ShaderStage *sta return BinaryData{static_cast(binSize), bin}; } +#endif // ===================================================================================================================== // SPIR-V assembler, converts SPIR-V assembly text file (input) to SPIR-V binary file (output). @@ -264,6 +268,7 @@ Expected compileGlsl(const std::string &inFilename, ShaderStage *sta // @param inFilename : Input filename, SPIR-V assembly text // @returns : BinaryData object of the assembled SPIR-V on success, `ResultError` on failure. Expected assembleSpirv(const std::string &inFilename) { +#ifndef LLPC_DISABLE_SPVGEN if (!InitSpvGen()) return createResultError(Result::ErrorUnavailable, "Failed to load SPVGEN -- cannot assemble SPIR-V assembler source"); @@ -301,6 +306,10 @@ Expected assembleSpirv(const std::string &inFilename) { LLPC_OUTS("\n\n"); return BinaryData{static_cast(binSize), bin}; +#else + return createResultError(Result::ErrorUnavailable, + "SPVGEN isn't available -- cannot assemble SPIR-V assembler source"); +#endif } // ===================================================================================================================== @@ -395,10 +404,10 @@ Error processInputPipeline(ICompiler *compiler, CompileInfo &compileInfo, const compileInfo.gfxPipelineInfo.cbState.target[target].format = VK_FORMAT_R8G8B8A8_SRGB; } } - +#ifndef LLPC_DISABLE_SPVGEN if (EnableOuts() && !InitSpvGen()) LLPC_OUTS("Failed to load SPVGEN -- cannot disassemble and validate SPIR-V\n"); - +#endif for (unsigned stage = 0; stage < pipelineState->numStages; ++stage) { if (pipelineState->stages[stage].dataSize > 0) { StandaloneCompiler::ShaderModuleData shaderModuleData = {}; @@ -408,9 +417,11 @@ Error processInputPipeline(ICompiler *compiler, CompileInfo &compileInfo, const compileInfo.shaderModuleDatas.push_back(shaderModuleData); compileInfo.stageMask |= shaderStageToMask(pipelineState->stages[stage].stage); +#ifndef LLPC_DISABLE_SPVGEN if (EnableOuts()) disassembleSpirv(pipelineState->stages[stage].dataSize, shaderModuleData.spirvBin.pCode, Twine(getShaderStageName(pipelineState->stages[stage].stage)) + " shader module"); +#endif } } @@ -465,6 +476,7 @@ static Expected processInputSpirvStage(const InputSpec &spirvI spvBin = *spvBinOrErr; } +#ifndef LLPC_DISABLE_SPVGEN const bool isSpvGenLoaded = InitSpvGen(); if (!isSpvGenLoaded) { LLPC_OUTS("Failed to load SPVGEN -- no SPIR-V disassembler available\n"); @@ -482,6 +494,7 @@ static Expected processInputSpirvStage(const InputSpec &spirvI return createResultError(Result::ErrorInvalidShader, Twine("Failed to validate SPIR-V:\n") + log); } } +#endif // NOTE: If the entry target is not specified, we set it to the one gotten from SPIR-V binary. std::string entryPoint = spirvInput.entryPoint; @@ -590,8 +603,10 @@ static Expected processInputStage(const InputSpec &inputSpec, if (isLlvmIrFile(inFile)) return processInputLlvmIrStage(inputSpec); +#ifndef LLPC_DISABLE_SPVGEN if (isGlslShaderTextFile(inFile)) return processInputGlslStage(inputSpec); +#endif return createResultError(Result::ErrorInvalidShader, Twine("File ") + inFile + diff --git a/llpc/translator/include/LLVMSPIRVLib.h b/llpc/translator/include/LLVMSPIRVLib.h index fc85bf8370..a2d0d39774 100644 --- a/llpc/translator/include/LLVMSPIRVLib.h +++ b/llpc/translator/include/LLVMSPIRVLib.h @@ -95,7 +95,8 @@ bool writeSpirv(llvm::Module *M, llvm::raw_ostream &OS, std::string &ErrMsg); bool readSpirv(lgc::Builder *Builder, const Vkgc::ShaderModuleUsage *ModuleData, const Vkgc::PipelineShaderOptions *ShaderOptions, std::istream &IS, spv::ExecutionModel EntryExecModel, const char *EntryName, const SPIRV::SPIRVSpecConstMap &SpecConstMap, - llvm::ArrayRef ConvertingSamplers, llvm::Module *M, std::string &ErrMsg); + llvm::ArrayRef ConvertingSamplers, llvm::StringRef globalVarPrefix, + llvm::Module *M, std::string &ErrMsg); /// \brief Regularize LLVM module by removing entities not representable by /// SPIRV. diff --git a/llpc/translator/lib/SPIRV/SPIRVInternal.h b/llpc/translator/lib/SPIRV/SPIRVInternal.h index a18f887b5b..d06c530829 100644 --- a/llpc/translator/lib/SPIRV/SPIRVInternal.h +++ b/llpc/translator/lib/SPIRV/SPIRVInternal.h @@ -227,6 +227,7 @@ const static char NonUniform[] = "spirv.NonUniform"; const static char AtomicCounter[] = "spirv.AtomicCounter"; const static char Lds[] = "spirv.Lds"; const static char ContStackStoreType[] = "spirv.ContStackStoreType"; +const static char MaximallyReconverges[] = "spirv.MaximallyReconverges"; } // namespace gSPIRVMD namespace gSPIRVName { @@ -497,6 +498,13 @@ union ShaderFloatControlFlags { unsigned U32All; }; +/// Defaults used for floating-point fast math (corresponds to FPFastMathModeMask) +struct ShaderFloatFastMathDefault { + unsigned Fp16; // FP16 fast math default + unsigned Fp32; // FP32 fast math default + unsigned Fp64; // FP64 fast math default +}; + } // namespace SPIRV #endif diff --git a/llpc/translator/lib/SPIRV/SPIRVReader.cpp b/llpc/translator/lib/SPIRV/SPIRVReader.cpp index 75996c6b00..409ebb52a0 100644 --- a/llpc/translator/lib/SPIRV/SPIRVReader.cpp +++ b/llpc/translator/lib/SPIRV/SPIRVReader.cpp @@ -270,6 +270,12 @@ SPIRVToLLVM::SPIRVToLLVM(Module *llvmModule, SPIRVModule *theSpirvModule, const m_spirvOpMetaKindId = m_context->getMDKindID(MetaNameSpirvOp); m_scratchBoundsChecksEnabled = scratchBoundsChecksEnabled(); + // We initialize FP fast math defaults to SPIRVWORD_MAX rather than zero. This is because zero means there is not FP + // fast math flags (corresponds to FPFastMathModeMaskNone). + m_fpFastMathDefault.Fp16 = SPIRVWORD_MAX; + m_fpFastMathDefault.Fp32 = SPIRVWORD_MAX; + m_fpFastMathDefault.Fp64 = SPIRVWORD_MAX; + m_workaroundStorageImageFormats = shaderOptions->workaroundStorageImageFormats; if (SpirvOverrideWorkaroundStorageImageFormats.getNumOccurrences() > 0) m_workaroundStorageImageFormats = SpirvOverrideWorkaroundStorageImageFormats.getValue(); @@ -1374,6 +1380,79 @@ FastMathFlags SPIRVToLLVM::getFastMathFlags(SPIRVValue *bv) { FastMathFlags fmf; + SPIRVWord fastMathMode = SPIRVWORD_MAX; + if (bv->hasDecorate(DecorationFPFastMathMode, 0, &fastMathMode)) { + // If we find the decoration FPFastMathMode for this value, use it and ignore the defaults. + } else { + // Try to use fast math defaults if it is specified for this type. + switch (ty->getFloatBitWidth()) { + case 16: + if (m_fpFastMathDefault.Fp16 != SPIRVWORD_MAX) + fastMathMode = m_fpFastMathDefault.Fp16; + break; + case 32: + if (m_fpFastMathDefault.Fp32 != SPIRVWORD_MAX) + fastMathMode = m_fpFastMathDefault.Fp32; + break; + case 64: + if (m_fpFastMathDefault.Fp64 != SPIRVWORD_MAX) + fastMathMode = m_fpFastMathDefault.Fp64; + break; + default: + llvm_unreachable("Unexpected bit width!"); + break; + } + } + + // Once we find FP fast math modes are specified explicitly, we respect them. + if (fastMathMode != SPIRVWORD_MAX) { + // Fast flag is deprecated by the extension SPV_KHR_float_controls2, must not be used. + assert((fastMathMode & FPFastMathModeFastMask) == 0); + + if (fastMathMode & FPFastMathModeNotNaNMask) + fmf.setNoNaNs(); + + if (fastMathMode & FPFastMathModeNotInfMask) + fmf.setNoInfs(); + + if (fastMathMode & FPFastMathModeNSZMask) + fmf.setNoSignedZeros(); + + if (fastMathMode & FPFastMathModeAllowRecipMask) + fmf.setAllowReciprocal(); + + if (fastMathMode & FPFastMathModeAllowContractMask) + fmf.setAllowContract(); + + if (fastMathMode & FPFastMathModeAllowReassocMask) + fmf.setAllowReassoc(); + + if (fastMathMode & FPFastMathModeAllowTransformMask) { + // NOTE: AllowTransform is a superset of AllowContract and AllowReassoc. The flags AllowContract and AllowReassoc + // must be set as well according to the spec. AllowTransform allows a floating-point operation and any + // operation(s) producing its operands to be transformed according to real-number rules so we treat it as + // combination of AllowRecip, AllowContract and AllowReassoc. + assert(fastMathMode & FPFastMathModeAllowContractMask); + assert(fastMathMode & FPFastMathModeAllowReassocMask); + fmf.setAllowReciprocal(); + fmf.setAllowContract(); + fmf.setAllowReassoc(); + } + + if (!fmf.noNaNs() || !fmf.noInfs() || !fmf.noSignedZeros()) { + // NOTE: Disallow reassociation if any flag of NotNaN, NotInf, or NSZ is missing. This is because + // reassociation can lead to unexpected results. Consider this: + // + // X - X * 0.0 + // + // If we apply reassociation X * (1.0 - 0.0) = X, the result becomes the value of X. However, if X is INF, NaN + // is expected by performing INF - INF * 0.0. + fmf.setAllowReassoc(false); + } + + return fmf; + } + fmf.setAllowReciprocal(); if (!ty->isTypeFloat(64)) { // Only do this for half and float, not double, to avoid problems with Vulkan CTS precision_double tests. @@ -1892,8 +1971,9 @@ Value *SPIRVToLLVM::addLoadInstRecursively(SPIRVType *const spvType, Value *load Constant *const zero = getBuilder()->getInt32(0); if (loadType->isStructTy() && !spvType->isTypeSampledImage() && !spvType->isTypeImage() && - !spvType->isTypeSampler() && spvType->getOpCode() != OpTypeRayQueryKHR) { - // Rewrite this condition to keep consistent with the assert on getStructMemberCount later + !spvType->isTypeSampler() && spvType->getOpCode() != OpTypeRayQueryKHR + // Rewrite this condition to keep consistent with the assert on getStructMemberCount later + ) { // For structs we lookup the mapping of the elements and use it to reverse map the values. const bool needsPad = isTypeWithPad(loadType); @@ -3686,7 +3766,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *con BasicBlock *const block = getBuilder()->GetInsertBlock(); Function *const func = getBuilder()->GetInsertBlock()->getParent(); Value *const predicate = transValue(spvOperands[1], func, block); - return getBuilder()->CreateSubgroupAny(predicate); + return getBuilder()->create(predicate); } // ===================================================================================================================== @@ -3774,7 +3854,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *co // // @param spvValue : A SPIR-V value. template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const spvValue) { - return getBuilder()->CreateSubgroupElect(); + return getBuilder()->create(); } // ===================================================================================================================== @@ -3802,7 +3882,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRV BasicBlock *const block = getBuilder()->GetInsertBlock(); Function *const func = getBuilder()->GetInsertBlock()->getParent(); Value *const predicate = transValue(spvOperands[1], func, block); - return getBuilder()->CreateSubgroupAny(predicate); + return getBuilder()->create(predicate); } // ===================================================================================================================== @@ -3819,6 +3899,24 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode( return getBuilder()->CreateSubgroupAllEqual(value); } +// ===================================================================================================================== +// Handle OpGroupNonUniformRotateKHR. +// +// @param spvValue : A SPIR-V value. +template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const spvValue) { + SPIRVInstruction *const spvInst = static_cast(spvValue); + std::vector spvOperands = spvInst->getOperands(); + assert(static_cast(spvOperands[0])->getZExtIntValue() == ScopeSubgroup); + + BasicBlock *const block = getBuilder()->GetInsertBlock(); + Function *const func = getBuilder()->GetInsertBlock()->getParent(); + Value *const value = transValue(spvOperands[1], func, block); + Value *const delta = transValue(spvOperands[2], func, block); + Value *const clusterSize = + spvOperands.size() > 3 ? transValue(spvOperands[3], func, block) : PoisonValue::get(Type::getInt32Ty(*m_context)); + return getBuilder()->CreateSubgroupRotate(value, delta, clusterSize); +} + // ===================================================================================================================== // Handle OpGroupNonUniformBroadcast. // @@ -4005,6 +4103,32 @@ template <> Value *SPIRVToLLVM::transValueWithOpcodeCreateSubgroupShuffleDown(value, delta); } +// ===================================================================================================================== +// Handle OpGroupNonUniformQuadAllKHR. +// +// @param spvValue : A SPIR-V value. +template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const spvValue) { + SPIRVInstruction *const spvInst = static_cast(spvValue); + std::vector spvOperands = spvInst->getOperands(); + BasicBlock *const block = getBuilder()->GetInsertBlock(); + Function *const func = getBuilder()->GetInsertBlock()->getParent(); + Value *const predicate = transValue(spvOperands[0], func, block); + return getBuilder()->CreateQuadAll(predicate, m_requireFullQuads); +} + +// ===================================================================================================================== +// Handle OpGroupNonUniformQuadAnyKHR. +// +// @param spvValue : A SPIR-V value. +template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const spvValue) { + SPIRVInstruction *const spvInst = static_cast(spvValue); + std::vector spvOperands = spvInst->getOperands(); + BasicBlock *const block = getBuilder()->GetInsertBlock(); + Function *const func = getBuilder()->GetInsertBlock()->getParent(); + Value *const predicate = transValue(spvOperands[0], func, block); + return getBuilder()->CreateQuadAny(predicate, m_requireFullQuads); +} + // ===================================================================================================================== // Create TraceRay dialect Op. // @@ -4408,7 +4532,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValu BasicBlock *const block = getBuilder()->GetInsertBlock(); Function *const func = getBuilder()->GetInsertBlock()->getParent(); Value *const predicate = transValue(spvOperands[0], func, block); - return getBuilder()->CreateSubgroupAny(predicate); + return getBuilder()->create(predicate); } // ===================================================================================================================== @@ -4887,6 +5011,14 @@ Value *SPIRVToLLVM::transVariable(SPIRVValue *const spvValue) { storageClass == StorageClassIncomingRayPayloadKHR) globalVar->setAlignment(MaybeAlign(4)); + if (!m_globalVarPrefix.empty()) { + if (globalVar->hasName()) { + globalVar->setName(Twine(m_globalVarPrefix) + globalVar->getName()); + } else { + globalVar->setName(Twine(m_globalVarPrefix) + "spv" + Twine(spvVar->getId())); + } + } + SPIRVBuiltinVariableKind builtinKind; if (spvVar->isBuiltin(&builtinKind)) m_builtinGvMap[globalVar] = builtinKind; @@ -5625,6 +5757,20 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu return mapValue(bv, result); } + case OpAssumeTrueKHR: { + SPIRVInstTemplateBase *assume = static_cast(bv); + Value *cond = transValue(assume->getOperand(0), f, bb); + return mapValue(bv, getBuilder()->CreateIntrinsic(Intrinsic::assume, {}, {cond})); + } + + case OpExpectKHR: { + SPIRVInstTemplateBase *expect = static_cast(bv); + Value *val0 = transValue(expect->getOperand(0), f, bb); + Value *val1 = transValue(expect->getOperand(1), f, bb); + auto instType = transType(expect->getType()); + return mapValue(bv, getBuilder()->CreateIntrinsic(Intrinsic::expect, instType, {val0, val1})); + } + case OpLine: case OpSelectionMerge: return {}; @@ -6450,6 +6596,12 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu return mapValue(bv, transValueWithOpcode(bv)); case OpSubgroupAllEqualKHR: return mapValue(bv, transValueWithOpcode(bv)); + case OpGroupNonUniformRotateKHR: + return mapValue(bv, transValueWithOpcode(bv)); + case OpGroupNonUniformQuadAllKHR: + return mapValue(bv, transValueWithOpcode(bv)); + case OpGroupNonUniformQuadAnyKHR: + return mapValue(bv, transValueWithOpcode(bv)); case OpSubgroupReadInvocationKHR: return mapValue(bv, transValueWithOpcode(bv)); case OpGroupIAddNonUniformAMD: @@ -6592,6 +6744,10 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *bf) { execModelMDs.push_back(ConstantAsMetadata::get(ConstantInt::get(int32Ty, execModel))); auto execModelMdNode = MDNode::get(*m_context, execModelMDs); f->addMetadata(gSPIRVMD::ExecutionModel, *execModelMdNode); + if (m_maximallyReconverges) { + auto trueValue = ConstantAsMetadata::get(ConstantInt::getTrue(*m_context)); + f->addMetadata(gSPIRVMD::MaximallyReconverges, *MDNode::get(*m_context, {trueValue})); + } } f->setCallingConv(CallingConv::SPIR_FUNC); @@ -7979,9 +8135,20 @@ bool SPIRVToLLVM::translate(ExecutionModel entryExecModel, const char *entryName if (auto em = m_entryTarget->getExecutionMode(ExecutionModeRoundingModeRTZ)) m_fpControlFlags.RoundingModeRTZ = em->getLiterals()[0] >> 3; + if (auto em = m_entryTarget->getExecutionMode(ExecutionModeFPFastMathDefault)) { + assert(em->getLiterals().size() == 3); // 3 words to hold FP16/FP32/FP64 defaults + m_fpFastMathDefault.Fp16 = em->getLiterals()[0]; + m_fpFastMathDefault.Fp32 = em->getLiterals()[1]; + m_fpFastMathDefault.Fp64 = em->getLiterals()[2]; + } + if (m_execModule >= ExecutionModelVertex && m_execModule <= ExecutionModelGeometry) hasXfbOuts = m_entryTarget->getExecutionMode(ExecutionModeXfb) != nullptr; + if (m_execModule == ExecutionModelFragment) + m_requireFullQuads = m_entryTarget->getExecutionMode(ExecutionModeRequireFullQuadsKHR) != nullptr; + + m_maximallyReconverges = m_entryTarget->getExecutionMode(ExecutionModeMaximallyReconvergesKHR) != nullptr; } else { createLibraryEntryFunc(); } @@ -8426,6 +8593,10 @@ bool SPIRVToLLVM::transMetadata() { computeMode.derivatives = DerivativeMode::Linear; else computeMode.derivatives = DerivativeMode::None; + + if (bf->getExecutionMode(ExecutionModeQuadDerivativesKHR)) + computeMode.derivatives = DerivativeMode::Quads; + unsigned overrideShaderGroupSizeX = m_shaderOptions->overrideShaderThreadGroupSizeX; unsigned overrideShaderGroupSizeY = m_shaderOptions->overrideShaderThreadGroupSizeY; unsigned overrideShaderGroupSizeZ = m_shaderOptions->overrideShaderThreadGroupSizeZ; @@ -10648,8 +10819,8 @@ Value *SPIRVToLLVM::transCooperativeMatrixKHRFromConstruct(SPIRVType *spvCoopMat bool llvm::readSpirv(Builder *builder, const ShaderModuleUsage *shaderInfo, const PipelineShaderOptions *shaderOptions, std::istream &is, spv::ExecutionModel entryExecModel, const char *entryName, - const SPIRVSpecConstMap &specConstMap, ArrayRef convertingSamplers, Module *m, - std::string &errMsg) { + const SPIRVSpecConstMap &specConstMap, ArrayRef convertingSamplers, + StringRef globalVarPrefix, Module *m, std::string &errMsg) { assert(entryExecModel != ExecutionModelKernel && "Not support ExecutionModelKernel"); std::unique_ptr bm(SPIRVModule::createSPIRVModule()); @@ -10657,6 +10828,7 @@ bool llvm::readSpirv(Builder *builder, const ShaderModuleUsage *shaderInfo, cons is >> *bm; SPIRVToLLVM btl(m, bm.get(), specConstMap, convertingSamplers, builder, shaderInfo, shaderOptions); + btl.setGlobalVarPrefix(globalVarPrefix); bool succeed = true; if (!btl.translate(entryExecModel, entryName)) { bm->getError(errMsg); diff --git a/llpc/translator/lib/SPIRV/SPIRVReader.h b/llpc/translator/lib/SPIRV/SPIRVReader.h index 7f1699f265..4e8ea3d63c 100644 --- a/llpc/translator/lib/SPIRV/SPIRVReader.h +++ b/llpc/translator/lib/SPIRV/SPIRVReader.h @@ -79,6 +79,8 @@ class SPIRVToLLVM { llvm::ArrayRef convertingSamplers, lgc::Builder *builder, const Vkgc::ShaderModuleUsage *moduleUsage, const Vkgc::PipelineShaderOptions *shaderOptions); + void setGlobalVarPrefix(llvm::StringRef globalVarPrefix) { m_globalVarPrefix = globalVarPrefix; } + DebugLoc getDebugLoc(SPIRVInstruction *bi, Function *f); void updateDebugLoc(SPIRVValue *bv, Function *f); @@ -249,8 +251,10 @@ class SPIRVToLLVM { LLVMContext *m_context; lgc::Builder *m_builder; SPIRVModule *m_bm; + std::string m_globalVarPrefix; bool m_enableGatherLodNz; ShaderFloatControlFlags m_fpControlFlags; + ShaderFloatFastMathDefault m_fpFastMathDefault; SPIRVFunction *m_entryTarget; const SPIRVSpecConstMap &m_specConstMap; llvm::ArrayRef m_convertingSamplers; @@ -282,6 +286,10 @@ class SPIRVToLLVM { unsigned m_execModule; bool m_scratchBoundsChecksEnabled; + bool m_requireFullQuads; + + bool m_maximallyReconverges = false; + enum class LlvmMemOpType : uint8_t { IS_LOAD, IS_STORE }; struct ScratchBoundsCheckData { LlvmMemOpType memOpType; diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEntry.cpp b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEntry.cpp index 778db418d0..f1130badd8 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEntry.cpp +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEntry.cpp @@ -409,6 +409,9 @@ void SPIRVExecutionModeId::decode(std::istream &I) { case ExecutionModeLocalSizeId: Operands.resize(3); break; + case ExecutionModeFPFastMathDefault: + Operands.resize(2); + break; default: // Do nothing. Keep this to avoid VS2013 warning. break; @@ -438,6 +441,10 @@ void SPIRVName::validate() const { assert(WordCount == getSizeInWords(Str) + 2 && "Incorrect word count"); } +void SPIRVString::updateString(const char *str, unsigned len) { + Str = std::string(str, len); +} + _SPIRV_IMP_ENCDEC2(SPIRVString, Id, Str) _SPIRV_IMP_DECODE3(SPIRVMemberName, Target, MemberNumber, Str) diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEntry.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEntry.h index d51f519c77..0fedfc2e62 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEntry.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEntry.h @@ -432,8 +432,10 @@ class SPIRVString : public SPIRVEntry { SPIRVString(SPIRVModule *M, SPIRVId TheId, const std::string &TheStr) : SPIRVEntry(M, FixedWC + getSizeInWords(TheStr), OC, TheId), Str(TheStr) {} SPIRVString() : SPIRVEntry(OC) {} + SPIRVString(Op OpCode) : SPIRVEntry(OpCode) {} _SPIRV_DCL_DECODE const std::string &getStr() const { return Str; } + void updateString(const char *str, unsigned len); protected: std::string Str; @@ -484,7 +486,7 @@ typedef SPIRVEntryOpCodeOnly SPIRVNoLine; class SPIRVExecutionMode : public SPIRVAnnotation { public: - // Complete constructor for LocalSize + // Complete constructor for LocalSize and FPFastMathDefault SPIRVExecutionMode(SPIRVEntry *TheTarget, SPIRVExecutionModeKind TheExecMode, SPIRVWord Word0, SPIRVWord Word1, SPIRVWord Word2) : SPIRVAnnotation(TheTarget, 6), ExecMode(TheExecMode) { diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEnum.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEnum.h index 3fb4f407e8..2c54985436 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEnum.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEnum.h @@ -213,6 +213,7 @@ template <> inline void SPIRVMap::init() { ADD_VEC_INIT(CapabilityCooperativeMatrixKHR, {CapabilityShader}); ADD_VEC_INIT(CapabilityComputeDerivativeGroupLinearNV, {CapabilityShader}); ADD_VEC_INIT(CapabilityComputeDerivativeGroupQuadsNV, {CapabilityShader}); + ADD_VEC_INIT(CapabilityQuadControlKHR, {CapabilityShader}); } template <> inline void SPIRVMap::init() { @@ -270,6 +271,8 @@ template <> inline void SPIRVMap::init() { ADD_VEC_INIT(ExecutionModeStencilRefUnchangedBackAMD, {CapabilityStencilExportEXT}); ADD_VEC_INIT(ExecutionModeStencilRefGreaterBackAMD, {CapabilityStencilExportEXT}); ADD_VEC_INIT(ExecutionModeStencilRefLessBackAMD, {CapabilityStencilExportEXT}); + ADD_VEC_INIT(ExecutionModeRequireFullQuadsKHR, {CapabilityQuadControlKHR}); + ADD_VEC_INIT(ExecutionModeQuadDerivativesKHR, {CapabilityQuadControlKHR}); } template <> inline void SPIRVMap::init() { diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h index 2f2780b527..f250f8b3dd 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -2141,6 +2141,7 @@ _SPIRV_OP(GroupNonUniformElect, true, 4) _SPIRV_OP(GroupNonUniformAll, true, 5) _SPIRV_OP(GroupNonUniformAny, true, 5) _SPIRV_OP(GroupNonUniformAllEqual, true, 5) +_SPIRV_OP(GroupNonUniformRotateKHR, true, 6) _SPIRV_OP(GroupNonUniformBroadcast, true, 6) _SPIRV_OP(GroupNonUniformBroadcastFirst, true, 5) _SPIRV_OP(GroupNonUniformBallot, true, 5) @@ -2181,6 +2182,16 @@ _SPIRV_OP(GroupUMaxNonUniformAMD, true, 6, true, 1) _SPIRV_OP(GroupSMaxNonUniformAMD, true, 6, true, 1) #undef _SPIRV_OP +class SPIRVQuadControlInstBase : public SPIRVInstTemplateBase { +public: + SPIRVCapVec getRequiredCapability() const override { return getVec(CapabilityQuadControlKHR); } +}; +#define _SPIRV_OP(x, ...) typedef SPIRVInstTemplate SPIRV##x; +// Group instructions +_SPIRV_OP(GroupNonUniformQuadAllKHR, true, 4) +_SPIRV_OP(GroupNonUniformQuadAnyKHR, true, 4) +#undef _SPIRV_OP + class SPIRVAtomicInstBase : public SPIRVInstTemplateBase { public: SPIRVCapVec getRequiredCapability() const override { @@ -2705,6 +2716,31 @@ class SPIRVCooperativeMatrixKHRInstBase : public SPIRVInstTemplateBase { _SPIRV_OP(CooperativeMatrixMulAddKHR, true, 6, true, 3) #undef _SPIRV_OP +class SPIRVExpectAssumeInstBase : public SPIRVInstTemplateBase { +public: + SPIRVCapVec getRequiredCapability() const override { return {CapabilityExpectAssumeKHR}; } + +protected: + void validate() const override { + SPIRVInstruction::validate(); + if (OpAssumeTrueKHR == OpCode) { + auto type = getValueType(Ops[0]); + assert(type->isTypeBool() && type->isTypeScalar()); + (void(type)); // unused + } else { + assert(OpExpectKHR == OpCode); + auto type = getValueType(Ops[0]); + assert(type->isTypeVectorOrScalarInt() || type->isTypeVectorOrScalarBool()); + (void(type)); // unused + } + } +}; + +#define _SPIRV_OP(x, ...) typedef SPIRVInstTemplate SPIRV##x; +_SPIRV_OP(AssumeTrueKHR, false, 2, false) +_SPIRV_OP(ExpectKHR, true, 5, false) +#undef _SPIRV_OP + } // namespace SPIRV #endif diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h index 41242df7cc..bf92863f6b 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h @@ -162,6 +162,9 @@ inline bool isValid(spv::ExecutionMode V) { case ExecutionModeStencilRefUnchangedBackAMD: case ExecutionModeStencilRefGreaterBackAMD: case ExecutionModeStencilRefLessBackAMD: + case ExecutionModeQuadDerivativesKHR: + case ExecutionModeRequireFullQuadsKHR: + case ExecutionModeFPFastMathDefault: return true; default: return false; @@ -562,6 +565,10 @@ inline bool isValid(spv::Capability V) { case CapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR: case CapabilityComputeDerivativeGroupLinearNV: case CapabilityComputeDerivativeGroupQuadsNV: + case CapabilityExpectAssumeKHR: + case CapabilityGroupNonUniformRotateKHR: + case CapabilityQuadControlKHR: + case CapabilityFloatControls2: return true; default: return false; @@ -821,6 +828,7 @@ inline bool isValid(spv::Op V) { case OpGroupNonUniformAll: case OpGroupNonUniformAny: case OpGroupNonUniformAllEqual: + case OpGroupNonUniformRotateKHR: case OpGroupNonUniformBroadcast: case OpGroupNonUniformBroadcastFirst: case OpGroupNonUniformBallot: @@ -851,6 +859,8 @@ inline bool isValid(spv::Op V) { case OpGroupNonUniformLogicalXor: case OpGroupNonUniformQuadBroadcast: case OpGroupNonUniformQuadSwap: + case OpGroupNonUniformQuadAllKHR: + case OpGroupNonUniformQuadAnyKHR: case OpCopyLogical: case OpPtrEqual: case OpPtrNotEqual: @@ -927,6 +937,8 @@ inline bool isValid(spv::Op V) { case OpRayQueryGetIntersectionObjectToWorldKHR: case OpRayQueryGetIntersectionWorldToObjectKHR: case OpRayQueryGetIntersectionTriangleVertexPositionsKHR: + case OpExpectKHR: + case OpAssumeTrueKHR: case OpTypeCooperativeMatrixKHR: case OpCooperativeMatrixLoadKHR: case OpCooperativeMatrixStoreKHR: @@ -975,6 +987,9 @@ inline bool isValidFPFastMathModeMask(SPIRVWord Mask) { ValidMask |= FPFastMathModeNSZMask; ValidMask |= FPFastMathModeAllowRecipMask; ValidMask |= FPFastMathModeFastMask; + ValidMask |= FPFastMathModeAllowContractMask; + ValidMask |= FPFastMathModeAllowReassocMask; + ValidMask |= FPFastMathModeAllowTransformMask; return (Mask & ~ValidMask) == 0; } diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVModule.cpp b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVModule.cpp index 2531347b91..279d1b92af 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVModule.cpp +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVModule.cpp @@ -368,6 +368,54 @@ void SPIRVModuleImpl::postProcessExecutionModeId() { execMode = add(new SPIRVExecutionMode(getEntry(tid), ExecutionModeLocalSizeId, ops[0], ops[1], ops[2])); break; } + case ExecutionModeFPFastMathDefault: { + assert(ops.size() == 2); // Must have 2 words + auto targetType = static_cast(getEntry(ops[0])); + assert(targetType && targetType->isTypeFloat()); // Must be a scalar floating-point type + const SPIRVWord fastMathMode = static_cast(getEntry(ops[1]))->getZExtIntValue(); + + // Try to find if we have already added this execution mode because FPFastMathDefault can appear for multiple + // times in SPIR-V binary to specify different fast math defaults for different FP types. + auto fpFastMathDefault = + static_cast(getEntry(tid))->getExecutionMode(ExecutionModeFPFastMathDefault); + if (fpFastMathDefault) { + switch (targetType->getBitWidth()) { + case 16: + fpFastMathDefault->updateLiteral(0, fastMathMode); + break; + case 32: + fpFastMathDefault->updateLiteral(1, fastMathMode); + break; + case 64: + fpFastMathDefault->updateLiteral(2, fastMathMode); + break; + default: + llvm_unreachable("Unexpected bit width!"); + break; + } + } else { + // Not found, create a new one with initialized values. + SPIRVWord fpDefault[3] = {SPIRVWORD_MAX, SPIRVWORD_MAX, SPIRVWORD_MAX}; + switch (targetType->getBitWidth()) { + case 16: + fpDefault[0] = fastMathMode; + break; + case 32: + fpDefault[1] = fastMathMode; + break; + case 64: + fpDefault[2] = fastMathMode; + break; + default: + llvm_unreachable("Unexpected bit width!"); + break; + } + fpFastMathDefault = add(new SPIRVExecutionMode(getEntry(tid), ExecutionModeFPFastMathDefault, fpDefault[0], + fpDefault[1], fpDefault[2])); + execMode = fpFastMathDefault; + } + break; + } default: break; } diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h index 9b17bde3c0..cd04bb2760 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h @@ -138,6 +138,10 @@ template <> inline void SPIRVMap::init() { add(ExecutionModeStencilRefUnchangedBackAMD, "StencilRefUnchangedBackAMD"); add(ExecutionModeStencilRefGreaterBackAMD, "StencilRefGreaterBackAMD"); add(ExecutionModeStencilRefLessBackAMD, "StencilRefLessBackAMD"); + add(ExecutionModeQuadDerivativesKHR, "QuadDerivativesKHR"); + add(ExecutionModeRequireFullQuadsKHR, "RequireFullQuadsKHR"); + add(ExecutionModeFPFastMathDefault, "FPFastMathDefault"); + add(ExecutionModeMaximallyReconvergesKHR, "MaximallyReconvergesKHR"); } SPIRV_DEF_NAMEMAP(ExecutionMode, SPIRVExecutionModeNameMap) @@ -502,6 +506,10 @@ template <> inline void SPIRVMap::init() { add(CapabilityCooperativeMatrixKHR, "CooperativeMatrixKHR"); add(CapabilityComputeDerivativeGroupLinearNV, "ComputeDerivativeGroupLinearNV"); add(CapabilityComputeDerivativeGroupQuadsNV, "ComputeDerivativeGroupQuadsNV"); + add(CapabilityExpectAssumeKHR, "ExpectAssumeKHR"); + add(CapabilityGroupNonUniformRotateKHR, "GroupNonUniformRotateKHR"); + add(CapabilityQuadControlKHR, "QuadControlKHR"); + add(CapabilityFloatControls2, "FloatControls2"); } SPIRV_DEF_NAMEMAP(Capability, SPIRVCapabilityNameMap) diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h index 9172c7ad9a..12c649e410 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h @@ -287,6 +287,7 @@ _SPIRV_OP(SubgroupFirstInvocationKHR, 4422) _SPIRV_OP(SubgroupAllKHR, 4428) _SPIRV_OP(SubgroupAnyKHR, 4429) _SPIRV_OP(SubgroupAllEqualKHR, 4430) +_SPIRV_OP(GroupNonUniformRotateKHR, 4431) _SPIRV_OP(SubgroupReadInvocationKHR, 4432) _SPIRV_OP(SDotKHR, 4450) _SPIRV_OP(UDotKHR, 4451) @@ -315,6 +316,8 @@ _SPIRV_OP(GroupSMaxNonUniformAMD, 5007) _SPIRV_OP(FragmentMaskFetchAMD, 5011) _SPIRV_OP(FragmentFetchAMD, 5012) _SPIRV_OP(ReadClockKHR, 5056) +_SPIRV_OP(GroupNonUniformQuadAllKHR, 5110) +_SPIRV_OP(GroupNonUniformQuadAnyKHR, 5111) _SPIRV_OP(EmitMeshTasksEXT, 5294) _SPIRV_OP(SetMeshOutputsEXT, 5295) _SPIRV_OP(ReportIntersectionKHR, 5334) @@ -341,6 +344,8 @@ _SPIRV_OP(SubgroupBlockReadINTEL, 5575) _SPIRV_OP(SubgroupBlockWriteINTEL, 5576) _SPIRV_OP(SubgroupImageBlockReadINTEL, 5577) _SPIRV_OP(SubgroupImageBlockWriteINTEL, 5578) +_SPIRV_OP(AssumeTrueKHR, 5630) +_SPIRV_OP(ExpectKHR, 5631) _SPIRV_OP(DecorateStringGOOGLE, 5632) _SPIRV_OP(MemberDecorateStringGOOGLE, 5633) _SPIRV_OP(RayQueryGetRayTMinKHR, 6016) diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.cpp b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.cpp index 4669efb22b..9c3cf9634c 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.cpp +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.cpp @@ -48,9 +48,15 @@ namespace SPIRV { SPIRVType *SPIRVType::getArrayElementType() const { - assert((OpCode == OpTypeArray || OpCode == OpTypeRuntimeArray) && "Not array type"); - return (OpCode == OpTypeArray) ? static_cast(this)->getElementType() - : static_cast(this)->getElementType(); + switch (OpCode) { + case OpTypeArray: + return static_cast(this)->getElementType(); + case OpTypeRuntimeArray: + return static_cast(this)->getElementType(); + default: + assert(!"Not array type"); + }; + return nullptr; } uint64_t SPIRVType::getArrayLength() const { diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.h index b5709c1007..13f209037a 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.h @@ -358,6 +358,7 @@ class SPIRVTypeRuntimeArray : public SPIRVType { // Incomplete constructor SPIRVTypeRuntimeArray() : SPIRVType(OpTypeRuntimeArray), ElemType(nullptr) {} + SPIRVTypeRuntimeArray(Op OC) : SPIRVType(OC), ElemType(nullptr) {} SPIRVType *getElementType() const { return ElemType; } SPIRVCapVec getRequiredCapability() const override { return getElementType()->getRequiredCapability(); } virtual std::vector getNonLiteralOperands() const override { diff --git a/llpc/util/llpcShaderModuleHelper.cpp b/llpc/util/llpcShaderModuleHelper.cpp index b4ff1027a2..d83ccb1082 100644 --- a/llpc/util/llpcShaderModuleHelper.cpp +++ b/llpc/util/llpcShaderModuleHelper.cpp @@ -220,15 +220,17 @@ ShaderModuleUsage ShaderModuleHelper::getShaderModuleUsageInfo(const BinaryData shaderModuleUsage.enableRayQuery = true; if ((!shaderModuleUsage.useSubgroupSize) && - ((capabilities.count(CapabilityGroupNonUniform) > 0) || (capabilities.count(CapabilityGroupNonUniformVote) > 0) || - (capabilities.count(CapabilityGroupNonUniformArithmetic) > 0) || - (capabilities.count(CapabilityGroupNonUniformBallot) > 0) || - (capabilities.count(CapabilityGroupNonUniformShuffle) > 0) || - (capabilities.count(CapabilityGroupNonUniformShuffleRelative) > 0) || - (capabilities.count(CapabilityGroupNonUniformClustered) > 0) || - (capabilities.count(CapabilityGroupNonUniformQuad) > 0) || - (capabilities.count(CapabilitySubgroupBallotKHR) > 0) || (capabilities.count(CapabilitySubgroupVoteKHR) > 0) || - (capabilities.count(CapabilityGroups) > 0))) { + ((capabilities.count(CapabilityGroupNonUniform) > 0) || + (capabilities.count(CapabilityGroupNonUniformVote) > 0) || + (capabilities.count(CapabilityGroupNonUniformArithmetic) > 0) || + (capabilities.count(CapabilityGroupNonUniformBallot) > 0) || + (capabilities.count(CapabilityGroupNonUniformShuffle) > 0) || + (capabilities.count(CapabilityGroupNonUniformShuffleRelative) > 0) || + (capabilities.count(CapabilityGroupNonUniformClustered) > 0) || + (capabilities.count(CapabilityGroupNonUniformQuad) > 0) || + (capabilities.count(CapabilitySubgroupBallotKHR) > 0) || + (capabilities.count(CapabilitySubgroupVoteKHR) > 0) || (capabilities.count(CapabilityGroups) > 0)) || + (capabilities.count(CapabilityGroupNonUniformRotateKHR) > 0)) { shaderModuleUsage.useSubgroupSize = true; } diff --git a/shared/continuations/CMakeLists.txt b/shared/continuations/CMakeLists.txt index f8b9ee8a9e..a715a91ef3 100644 --- a/shared/continuations/CMakeLists.txt +++ b/shared/continuations/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_library(LLVMContinuations lib/DXILContLgcRtOpConverter.cpp lib/DXILContPostProcess.cpp lib/DXILSupport.cpp + lib/GpurtContext.cpp lib/GpurtDialect.cpp lib/LegacyCleanupContinuations.cpp lib/LgcCpsDialect.cpp diff --git a/shared/continuations/include/continuations/GpurtContext.h b/shared/continuations/include/continuations/GpurtContext.h new file mode 100644 index 0000000000..bb5df3a12a --- /dev/null +++ b/shared/continuations/include/continuations/GpurtContext.h @@ -0,0 +1,60 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file GpurtContext.h + * @brief LLVMContext extension that stores a GPURT library module + *********************************************************************************************************************** + */ + +#pragma once + +#include + +#include "llvm-dialects/Dialect/ContextExtension.h" + +namespace llvm { +class Module; +} + +namespace lgc { + +// This extension can be attached to an LLVMContext and queried via the +// GpurtContext::get method inherited from the base class. +// +// Compiler drivers (like LLPC) are expected to set theModule to the GPURT +// library, so that certain raytracing-related passes can cross-module inline +// functions implemented there. +class GpurtContext : public llvm_dialects::ContextExtensionImpl { +public: + explicit GpurtContext(llvm::LLVMContext &) {} + ~GpurtContext(); + + static Key theKey; + + std::unique_ptr theModule; +}; + +} // namespace lgc diff --git a/shared/continuations/include/lgc/GpurtDialect.td b/shared/continuations/include/lgc/GpurtDialect.td index 4c4f6fb563..9ac6fd4a40 100644 --- a/shared/continuations/include/lgc/GpurtDialect.td +++ b/shared/continuations/include/lgc/GpurtDialect.td @@ -97,7 +97,7 @@ def GpurtLdsStackStoreOp : GpurtOp<"lds.stack.store", [Memory<[(write)]>, WillRe } def GpurtFloatWithRoundModeOp : GpurtOp<"rt.floatop.roundmode", [Memory<[]>, WillReturn]> { - let arguments = (ins I32:$roundMode, I32:$operation, (ScalarOrFixedVector F32):$src0, (eq $src0):$src1); + let arguments = (ins I32:$round_mode, I32:$operation, (ScalarOrFixedVector F32):$src0, (eq $src0):$src1); let results = (outs (eq $src0):$result); let summary = "return result of floatOp with roundmode"; } @@ -126,8 +126,14 @@ def GpurtGetFlattenedGroupThreadIdOp : GpurtOp<"get.flattened.group.thread.id", let summary = "return the flattened group thread ID"; } +def GpurtDispatchThreadIdFlatOp : GpurtOp<"dispatch.thread.id.flat", [Memory<[]>, WillReturn]> { + let arguments = (ins); + let results = (outs I32:$result); + let summary = "return the flat dispatch threadID"; +} + def GpurtSetHitAttributesOp : GpurtOp<"set.hit.attributes", [Memory<[(write InaccessibleMem)]>, WillReturn]> { - let arguments = (ins F32:$tCurrent, I32:$kind, I32:$status, I32:$instNodeAddrLo, I32:$instNodeAddrHi, + let arguments = (ins F32:$t_current, I32:$kind, I32:$status, I32:$inst_node_addr_lo, I32:$inst_node_addr_hi, I32:$primitiveIndex, I32:$anyHitCallType, I32:$geometryIndex); let results = (outs); @@ -135,15 +141,15 @@ def GpurtSetHitAttributesOp : GpurtOp<"set.hit.attributes", [Memory<[(write Inac let description = [{ Compiler notification of hit attributes. - - tCurrent: Current parametric hit distance relative to TMin. + - t_current: Current parametric hit distance relative to TMin. - kind: Intersection hit kind. - status: Hit status. - - instNodeAddrLo: Current instance node address lo bits. + - inst_node_addr_lo: Current instance node address lo bits. - - instNodeAddrHi: Current instance node address hi bits. + - inst_node_addr_hi: Current instance node address hi bits. - anyHitCallType: Indication of calling behavior on any hit shader. @@ -153,111 +159,111 @@ def GpurtSetHitAttributesOp : GpurtOp<"set.hit.attributes", [Memory<[(write Inac def GpurtGetHitAttributesOp : GpurtOp<"get.hit.attributes", [Memory<[(write ArgMem), (read InaccessibleMem)]>, WillReturn]> { - let arguments = (ins PointerType:$tCurrentPtr, PointerType:$kindPtr, PointerType:$statusPtr); + let arguments = (ins PointerType:$t_current_ptr, PointerType:$kind_ptr, PointerType:$status_ptr); let results = (outs); let summary = "Get hit attributes"; let description = [{ Get hit attributes from compiler. - - tCurrentPtr: Pointer to store current parametric hit distance relative to TMin. + - t_current_ptr: Pointer to store current parametric hit distance relative to TMin. - - kindPtr: Pointer to store intersection hit kind. + - kind_ptr: Pointer to store intersection hit kind. - - statusPtr: Pointer to store hit status. + - status_ptr: Pointer to store hit status. }]; } def GpurtSetTraceParamsOp : GpurtOp<"set.trace.params", [Memory<[(write InaccessibleMem)]>, WillReturn]> { - let arguments = (ins I32:$rayFlags, I32:$instanceInclusionMask, F32:$originX, F32:$originY, F32:$originZ, - F32:$tMin, F32:$dirX, F32:$dirY, F32:$dirZ); + let arguments = (ins I32:$ray_flags, I32:$instance_inclusion_mask, F32:$origin_x, F32:$origin_y, F32:$origin_z, + F32:$t_min, F32:$dir_x, F32:$dir_y, F32:$dir_z); let results = (outs); let summary = "Set trace parameters"; let description = [{ Compiler notification of trace input parameters. - - rayFlags: Ray flags. + - ray_flags: Ray flags. - - instanceInclusionMask: Instance inclusion mask. + - instance_inclusion_mask: Instance inclusion mask. - - originX: Ray origin X. + - origin_x: Ray origin X. - - originY: Ray origin Y. + - origin_y: Ray origin Y. - - originZ: Ray origin Z. + - origin_z: Ray origin Z. - - tMin: T Min. + - t_min: T Min. - - dirX: World ray direction X. + - dir_x: World ray direction X. - - dirY: World ray direction Y. + - dir_y: World ray direction Y. - - dirZ: World ray direction Z. + - dir_z: World ray direction Z. }]; } def GpurtCallClosestHitShaderOp : GpurtOp<"call.closest.hit.shader", [Memory<[(readwrite InaccessibleMem)]>, WillReturn]> { - let arguments = (ins V2I32:$shaderId, I32:$tableIndex); + let arguments = (ins V2I32:$shader_id, I32:$table_index); let results = (outs I1:$result); let summary = " Call closest hit shader"; let description = [{ Compiler closest hit shader inlining patch function. - - shaderId: Closest hit shader identifier. + - shader_id: Closest hit shader identifier. - - tableIndex: Hit group shader record table index. + - table_index: Hit group shader record table index. Returns true if the call was inlined. }]; } def GpurtCallMissShaderOp : GpurtOp<"call.miss.shader", [Memory<[(readwrite InaccessibleMem)]>, WillReturn]> { - let arguments = (ins V2I32:$shaderId, I32:$tableIndex); + let arguments = (ins V2I32:$shader_id, I32:$table_index); let results = (outs I1:$result); let summary = " Call miss shader"; let description = [{ Compiler miss shader inlining patch function. - - shaderId: Miss shader identifier. + - shader_id: Miss shader identifier. - - tableIndex: Miss shader record table index. + - table_index: Miss shader record table index. Returns true if the call was inlined. }]; } def GpurtCallTriangleAnyHitShaderOp : GpurtOp<"call.triangle.any.hit.shader", [Memory<[(readwrite InaccessibleMem)]>, WillReturn]> { - let arguments = (ins V2I32:$shaderId, I32:$tableIndex, V2F32:$attr); + let arguments = (ins V2I32:$shader_id, I32:$table_index, V2F32:$attr); let results = (outs); let summary = " Call any hit shader"; let description = [{ Compiler any hit shader inlining patch function. - - shaderId: AnyHit shader identifier. + - shader_id: AnyHit shader identifier. - - tableIndex: Hit group shader record index. + - table_index: Hit group shader record index. - attr: Triangle barycentrics. }]; } def GpurtCallIntersectionShaderOp : GpurtOp<"call.intersection.shader", [Memory<[(readwrite InaccessibleMem)]>, WillReturn]> { - let arguments = (ins V2I32:$shaderId, V2I32:$anyHitShaderId, I32:$tableIndex); + let arguments = (ins V2I32:$shader_id, V2I32:$any_hit_shader_id, I32:$table_index); let results = (outs); let summary = " Call intersection shader"; let description = [{ Compiler intersection shader inlining patch function. - - shaderId: Programmable intersection shader identifier. + - shader_id: Programmable intersection shader identifier. - - anyHitShaderId: AnyHit shader identifier. + - any_hit_shader_id: AnyHit shader identifier. - - tableIndex: Hit group shader record index. + - table_index: Hit group shader record index. }]; } @@ -276,16 +282,16 @@ def GpurtSetTriangleIntersectionAttributesOp def GpurtSetHitTriangleNodePointerOp : GpurtOp<"set.hit.triangle.node.pointer", [Memory<[(write InaccessibleMem)]>, WillReturn]> { - let arguments = (ins I64:$bvhAddress, I32:$nodePointer); + let arguments = (ins I64:$bvh_address, I32:$node_pointer); let results = (outs); let summary = "Set hit triangle node pointer"; let description = [{ Compiler notification of hit triangle node pointer. - - bvhAddress: The BVH address. + - bvh_address: The BVH address. - - nodePointer: Node pointer of hit triangle. + - node_pointer: Node pointer of hit triangle. }]; } diff --git a/shared/continuations/include/lgc/LgcRtDialect.td b/shared/continuations/include/lgc/LgcRtDialect.td index 2f88c5440e..c0da863447 100644 --- a/shared/continuations/include/lgc/LgcRtDialect.td +++ b/shared/continuations/include/lgc/LgcRtDialect.td @@ -85,13 +85,13 @@ def AcceptHitAndEndSearchOp : LgcRtOp<"accept.hit.and.end.search", [Memory<[(wri // ========================================================================================================= def CallCallableShaderOp : LgcRtOp<"call.callable.shader", [Memory<[(readwrite InaccessibleMem), (readwrite ArgMem)]>, WillReturn]> { - let arguments = (ins I32:$shaderIndex, PointerType:$param, AttrI32:$paramDataSizeBytes); + let arguments = (ins I32:$shader_index, PointerType:$param, AttrI32:$param_data_size_bytes); let results = (outs); let summary = "Call a ray-tracing callable shader"; let description = [{ Call the ray-tracing callable shader at the specified index in the callable shader table, - passing the parameter in and out. $paramDataSizeBytes refers to the size of the data + passing the parameter in and out. $param_data_size_bytes refers to the size of the data pointed to by $param, given in bytes. }]; } @@ -256,7 +256,7 @@ def RayTminOp : LgcRtOp<"ray.tmin", [Memory<[]>, WillReturn]> { // ========================================================================================================= def ReportHitOp : LgcRtOp<"report.hit", [Memory<[(write InaccessibleMem), (read ArgMem)]>]> { - let arguments = (ins F32:$thit, I32:$hitKind, PointerType:$attributes, AttrI32:$size); + let arguments = (ins F32:$thit, I32:$hit_kind, PointerType:$attributes, AttrI32:$size); let results = (outs I1:$result); let summary = "Report a hit in an intersection shader"; @@ -266,7 +266,7 @@ def ReportHitOp : LgcRtOp<"report.hit", [Memory<[(write InaccessibleMem), (read - thit: Parametric distance of the intersection. - - hitKind: User-specified hit kind in the range 0-127, visible to anyHit and closestHit shaders using + - hit_kind: User-specified hit kind in the range 0-127, visible to anyHit and closestHit shaders using HitKindOp. - attributes: User-defined intersection attributes. @@ -291,14 +291,14 @@ def ShaderIndexOp : LgcRtOp<"shader.index", [Memory<[]>, WillReturn]> { // ========================================================================================================= def ShaderRecordBufferOp : LgcRtOp<"shader.record.buffer", [Memory<[]>, WillReturn]> { - let arguments = (ins I32:$shaderIndex); + let arguments = (ins I32:$shader_index); let results = (outs GlobalPointer:$ptr); let summary = "Return the pointer to shader record buffer"; let description = [{ Return the pointer to shader record buffer. - - shaderIndex: The value used to index into the shader binding table. + - shader_index: The value used to index into the shader binding table. }]; } diff --git a/shared/continuations/lib/GpurtContext.cpp b/shared/continuations/lib/GpurtContext.cpp new file mode 100644 index 0000000000..c525d57cb3 --- /dev/null +++ b/shared/continuations/lib/GpurtContext.cpp @@ -0,0 +1,40 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file GpurtContext.cpp + * @brief LLVMContext extension that stores a GPURT library module + *********************************************************************************************************************** + */ + +#include "continuations/GpurtContext.h" +#include "llvm/IR/Module.h" + +using namespace llvm; +using namespace lgc; + +GpurtContext::Key GpurtContext::theKey; + +GpurtContext::~GpurtContext() = default; diff --git a/shared/continuations/lib/LowerRaytracingPipeline.cpp b/shared/continuations/lib/LowerRaytracingPipeline.cpp index 3ed308566e..498ea09906 100644 --- a/shared/continuations/lib/LowerRaytracingPipeline.cpp +++ b/shared/continuations/lib/LowerRaytracingPipeline.cpp @@ -286,6 +286,7 @@ class LowerRaytracingPipelinePassImpl final { /// Calls to hlsl intrinsics that cannot be rematerialized SmallVector IntrinsicCalls; SmallVector ShaderIndexCalls; + SmallVector ShaderRecordBufferCalls; /// Pointer to the alloca'd system data object in this function AllocaInst *SystemData = nullptr; @@ -331,6 +332,7 @@ class LowerRaytracingPipelinePassImpl final { void replaceReportHitCall(FunctionData &Data, CallInst *Call); void replaceShaderIndexCall(FunctionData &Data, CallInst *Call); + void replaceShaderRecordBufferCall(FunctionData &Data, CallInst *Call); void handleGetFuncAddr(Function &Func); void handleGetShaderKind(Function &Func); @@ -433,6 +435,8 @@ class LowerRaytracingPipelinePassImpl final { Function *CallShader; Function *ReportHit; Function *AcceptHit; + Function *GetSbtAddress; + Function *GetSbtStride; Function *RegisterBufferSetPointerBarrier; }; @@ -969,6 +973,43 @@ void LowerRaytracingPipelinePassImpl::replaceShaderIndexCall(FunctionData &Data, Call->eraseFromParent(); } +/// Replace a call to lgc.rt.shader.record.buffer with loading the resource. +void LowerRaytracingPipelinePassImpl::replaceShaderRecordBufferCall( + FunctionData &Data, CallInst *Call) { + auto shaderRecordBufferOp = cast(Call); + auto tableIndex = shaderRecordBufferOp->getShaderIndex(); + + assert(GetSbtAddress && "Could not find GetSbtAddress function"); + assert(GetSbtStride && "Could not find GetSbtStride function"); + + Value *tableAddr = + CrossInliner.inlineCall(Builder, GetSbtAddress).returnValue; + Value *tableStride = + CrossInliner.inlineCall(Builder, GetSbtStride).returnValue; + + // SBT starts with shader group handle (aka shader identifier), which is 32 + // bytes, then the data for shader record buffer. + constexpr unsigned ShaderIdEntrySizeInBytes = 32; + Value *shaderIdsSizeVal = Builder.getInt32(ShaderIdEntrySizeInBytes); + + // Byte offset = (tableStride * tableIndex) + shaderIdsSize + Value *offset = Builder.CreateMul(tableIndex, tableStride); + offset = Builder.CreateAdd(offset, shaderIdsSizeVal); + + // Zero-extend offset value to 64 bit + offset = Builder.CreateZExt(offset, Builder.getInt64Ty()); + + // Final addr + tableAddr = Builder.CreateAdd(tableAddr, offset); + + Type *gpuAddrAsPtrTy = + PointerType::get(Builder.getContext(), 1 /* ADDR_SPACE_GLOBAL */); + tableAddr = Builder.CreateIntToPtr(tableAddr, gpuAddrAsPtrTy); + + Call->replaceAllUsesWith(tableAddr); + Call->eraseFromParent(); +} + void LowerRaytracingPipelinePassImpl::handleGetFuncAddr(Function &Func) { assert(Func.arg_empty() // returns i64 or i32 @@ -1840,6 +1881,12 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, for (auto *Call : Data.ShaderIndexCalls) replaceShaderIndexCall(Data, Call); + // Replace ShaderRecordBufferOp calls + for (auto *Call : Data.ShaderRecordBufferCalls) { + Builder.SetInsertPoint(&*++Call->getIterator()); + replaceShaderRecordBufferCall(Data, Call); + } + // Replace non-rematerializable intrinsic calls for (auto *Call : Data.IntrinsicCalls) replaceIntrinsicCall(Builder, Data.SystemDataTy, Data.SystemData, Data.Kind, @@ -1977,7 +2024,7 @@ void LowerRaytracingPipelinePassImpl::handleUnrematerializableCandidates() { static const llvm_dialects::OpSet NonRematerializableDialectOps = llvm_dialects::OpSet::get(); + ShaderIndexOp, ShaderRecordBufferOp>(); if (!NonRematerializableDialectOps.contains(Func)) { llvm::forEachCall(Func, [&](llvm::CallInst &CInst) { auto Data = ToProcess.find(CInst.getFunction()); @@ -2071,6 +2118,15 @@ void LowerRaytracingPipelinePassImpl::collectGpuRtFunctions() { AcceptHit->arg_size() == 1 // Traversal data && AcceptHit->getFunctionType()->getParamType(0)->isPointerTy()); + + GetSbtAddress = GpurtLibrary->getFunction("_cont_GetSbtAddress"); + if (GetSbtAddress) + assert(GetSbtAddress->getReturnType()->isIntegerTy(64) && + GetSbtAddress->arg_empty()); + GetSbtStride = GpurtLibrary->getFunction("_cont_GetSbtStride"); + if (GetSbtStride) + assert(GetSbtStride->getReturnType()->isIntegerTy(32) && + GetSbtStride->arg_empty()); } LowerRaytracingPipelinePassImpl::LowerRaytracingPipelinePassImpl( @@ -2095,8 +2151,9 @@ bool LowerRaytracingPipelinePassImpl::run() { static const auto Visitor = llvm_dialects::VisitorBuilder() .setStrategy(llvm_dialects::VisitorStrategy::ByInstruction) - .addSet([](VisitorState &State, Instruction &Op) { + .addSet([](VisitorState &State, + Instruction &Op) { auto *CInst = cast(&Op); auto Data = State.Processables.find(CInst->getFunction()); if (Data == State.Processables.end()) @@ -2107,6 +2164,11 @@ bool LowerRaytracingPipelinePassImpl::run() { return; } + if (isa(Op)) { + Data->second.ShaderRecordBufferCalls.push_back(CInst); + return; + } + Type *PayloadTy = ContHelper::getPayloadTypeFromMetadata(*CInst); if (!isa(Op)) { diff --git a/shared/continuations/test/dx/lower-rt-pipeline-small-payload-field.ll.hlsl b/shared/continuations/test/dx/lower-rt-pipeline-small-payload-field.ll.hlsl index 994e8768dc..fad4f37c43 100644 --- a/shared/continuations/test/dx/lower-rt-pipeline-small-payload-field.ll.hlsl +++ b/shared/continuations/test/dx/lower-rt-pipeline-small-payload-field.ll.hlsl @@ -1,14 +1,9 @@ // This file is not a test itself, but used to generate the .ll test file. -struct [raypayload] Payload -{ - int v[5] : write(caller) : read(miss, caller); - min16uint smallField : write(miss) : read(caller); - min16uint3 smallFieldVec : write(miss) : read(caller); +struct[raypayload] Payload { + int v[5] : write(caller) : read(miss, caller); + min16uint smallField : write(miss) : read(caller); + min16uint3 smallFieldVec : write(miss) : read(caller); }; -[shader("miss")] -void Miss(inout Payload payload) -{ - payload.smallField = 17; -} +[shader("miss")] void Miss(inout Payload payload) { payload.smallField = 17; } diff --git a/shared/continuations/test/dx/payload-caller-in-paq.ll.hlsl b/shared/continuations/test/dx/payload-caller-in-paq.ll.hlsl index eed36ecfc6..ea08c13a60 100644 --- a/shared/continuations/test/dx/payload-caller-in-paq.ll.hlsl +++ b/shared/continuations/test/dx/payload-caller-in-paq.ll.hlsl @@ -1,37 +1,23 @@ // This file is not a test, rather it was used to generate // payload_caller_in_paq.ll and is kept so the .ll file can be re-generated. -struct [raypayload] MyPayload -{ - float v1 : write(caller) : read(caller); - int v2 : write(closesthit) : read(caller); - double v3 : write(miss) : read(caller); +struct[raypayload] MyPayload { + float v1 : write(caller) : read(caller); + int v2 : write(closesthit) : read(caller); + double v3 : write(miss) : read(caller); }; RaytracingAccelerationStructure myAccelerationStructure : register(t3); RWTexture2D gOutput : register(u0); -[shader("raygeneration")] -void RayGen() -{ - MyPayload payload; - payload.v1 = 1.0; +[shader("raygeneration")] void RayGen() { + MyPayload payload; + payload.v1 = 1.0; - RayDesc myRay = { - float3(0., 0., 0.), - 0., - float3(0., 0., 0.), - 1.0}; + RayDesc myRay = {float3(0., 0., 0.), 0., float3(0., 0., 0.), 1.0}; - TraceRay( - myAccelerationStructure, - 0, - 0, - 0, - 0, - 0, - myRay, - payload); + TraceRay(myAccelerationStructure, 0, 0, 0, 0, 0, myRay, payload); - gOutput[DispatchRaysIndex().xy] = float4(payload.v1, payload.v2, payload.v3, 0.); + gOutput[DispatchRaysIndex().xy] = + float4(payload.v1, payload.v2, payload.v3, 0.); } diff --git a/shared/continuations/test/dx/payload-save-registers.ll.hlsl b/shared/continuations/test/dx/payload-save-registers.ll.hlsl index 1cac156af5..0baf77eb28 100644 --- a/shared/continuations/test/dx/payload-save-registers.ll.hlsl +++ b/shared/continuations/test/dx/payload-save-registers.ll.hlsl @@ -1,52 +1,34 @@ // This file is not a test itself, but used to generate the .ll test file. -struct [raypayload] OuterPayload -{ - // These are written in miss, so they are not saved before recursive - // TraceRay in miss - float v1[15] : write(caller,miss) : read(caller,miss); - // These need to be saved before recursive TraceRay. - // However, these are only partially in registers, - // so are only saved partially. The memory part does not need - // to be saved. - float v2[15] : write(caller) : read(caller); +struct[raypayload] OuterPayload { + // These are written in miss, so they are not saved before recursive + // TraceRay in miss + float v1[15] : write(caller, miss) : read(caller, miss); + // These need to be saved before recursive TraceRay. + // However, these are only partially in registers, + // so are only saved partially. The memory part does not need + // to be saved. + float v2[15] : write(caller) : read(caller); }; -struct [raypayload] InnerPayload -{ - float v1 : write(caller) : read(caller); +struct[raypayload] InnerPayload { + float v1 : write(caller) : read(caller); }; RaytracingAccelerationStructure myAccelerationStructure : register(t3); RWTexture2D gOutput : register(u0); -[shader("miss")] -void Miss(inout OuterPayload outerPayload) -{ - InnerPayload innerPayload; - innerPayload.v1 = outerPayload.v1[14]; - - RayDesc myRay = { - float3(0., 0., 0.), - 0., - float3(0., 0., 0.), - 1.0}; - - TraceRay( - myAccelerationStructure, - 0, - 0, - 0, - 0, - 0, - myRay, - innerPayload); - - outerPayload.v1[14] = innerPayload.v1; +[shader("miss")] void Miss(inout OuterPayload outerPayload) { + InnerPayload innerPayload; + innerPayload.v1 = outerPayload.v1[14]; + + RayDesc myRay = {float3(0., 0., 0.), 0., float3(0., 0., 0.), 1.0}; + + TraceRay(myAccelerationStructure, 0, 0, 0, 0, 0, myRay, innerPayload); + + outerPayload.v1[14] = innerPayload.v1; } -[shader("callable")] -void callable(inout OuterPayload outerPayload) -{ - CallShader(0, outerPayload); + [shader("callable")] void callable(inout OuterPayload outerPayload) { + CallShader(0, outerPayload); } diff --git a/tool/vfx/CMakeLists.txt b/tool/vfx/CMakeLists.txt index 66b73686e5..56d8c235fb 100644 --- a/tool/vfx/CMakeLists.txt +++ b/tool/vfx/CMakeLists.txt @@ -33,6 +33,10 @@ option(VFX_ENABLE_WERROR "Build ${PROJECT_NAME} with more errors" OFF) target_compile_definitions(vfx PRIVATE ${TARGET_ARCHITECTURE_ENDIANESS}ENDIAN_CPU) +if (LLPC_DISABLE_SPVGEN) + target_compile_definitions(vfx PRIVATE VFX_DISABLE_SPVGEN=1) +endif() + target_sources(vfx PRIVATE vfxParser.cpp vfxPipelineDoc.cpp @@ -45,7 +49,10 @@ target_compile_definitions(vfx PRIVATE VFX_SUPPORT_VK_PIPELINE SH_EXPORTING) target_include_directories(vfx PUBLIC ${PROJECT_SOURCE_DIR}) -target_link_libraries(vfx PRIVATE spvgen_static vkgc_headers khronos_vulkan_interface khronos_spirv_interface) +target_link_libraries(vfx PRIVATE vkgc_headers khronos_vulkan_interface khronos_spirv_interface) +if (NOT LLPC_DISABLE_SPVGEN) + target_link_libraries(vfx PRIVATE spvgen_static) +endif() if(ICD_BUILD_LLPC) target_compile_definitions(vfx PRIVATE ICD_BUILD_LLPC) diff --git a/tool/vfx/vfxSection.cpp b/tool/vfx/vfxSection.cpp index fb44ad54cd..d4d9e6bc1d 100644 --- a/tool/vfx/vfxSection.cpp +++ b/tool/vfx/vfxSection.cpp @@ -108,9 +108,7 @@ static ParserInit Init; // @param sectionName : Name of this section. Section::Section(StrToMemberAddrArrayRef addrTable, SectionType sectionType, const char *sectionName) : m_sectionType(sectionType), m_sectionName(sectionName), m_lineNum(0), m_memberTable(addrTable.data), - m_tableSize(addrTable.size), m_isActive(false){ - - }; + m_tableSize(unsigned(addrTable.size)), m_isActive(false){}; // ===================================================================================================================== // Initializes static variable m_sectionInfo diff --git a/util/extensions.txt b/util/extensions.txt index d65c1fef5c..97eef1c0b3 100644 --- a/util/extensions.txt +++ b/util/extensions.txt @@ -47,3 +47,9 @@ SPV_KHR_cooperative_matrix #endif SPV_NV_shader_atomic_float SPV_NV_compute_shader_derivatives +SPV_KHR_maximal_reconvergence +SPV_KHR_expect_assume +#if VKI_KHR_SHADER_QUAD_CONTROL +SPV_KHR_shader_quad_control +#endif +SPV_KHR_subgroup_rotate diff --git a/util/vkgcCapability.h b/util/vkgcCapability.h index 4a35f27642..97d66b65db 100644 --- a/util/vkgcCapability.h +++ b/util/vkgcCapability.h @@ -162,6 +162,9 @@ static const char *const VkgcSupportedCapabilities[] = { "CapabilityCooperativeMatrixKHR", "CapabilityComputeDerivativeGroupLinearNV", "CapabilityComputeDerivativeGroupQuadsNV", + "CapabilityExpectAssumeKHR", + "CapabilityQuadControl", + "CapabilityGroupNonUniformRotateKHR", }; }; // namespace Vkgc diff --git a/util/vkgcExtension.cpp b/util/vkgcExtension.cpp index acfe1af16f..3c6becfecf 100644 --- a/util/vkgcExtension.cpp +++ b/util/vkgcExtension.cpp @@ -99,6 +99,10 @@ const ExtensionNamePair ExtensionNameTable[ExtensionCount] = { DeclExtensionName(KHR_RAY_QUERY), DeclExtensionName(NV_SHADER_ATOMIC_FLOAT), DeclExtensionName(NV_COMPUTE_SHADER_DERIVATIVES), + DeclExtensionName(KHR_MAXIMAL_RECONVERGENCE), + DeclExtensionName(KHR_EXPECT_ASSUME), + DeclExtensionName(KHR_SHADER_QUAD_CONTROL), + DeclExtensionName(KHR_SUBGROUP_ROTATE), }; // ===================================================================================================================== diff --git a/util/vkgcExtension.h b/util/vkgcExtension.h index 0da87c3e0b..505c3d6f49 100644 --- a/util/vkgcExtension.h +++ b/util/vkgcExtension.h @@ -81,6 +81,10 @@ enum Extension : unsigned { KHR_RAY_QUERY, NV_SHADER_ATOMIC_FLOAT, NV_COMPUTE_SHADER_DERIVATIVES, + KHR_MAXIMAL_RECONVERGENCE, + KHR_EXPECT_ASSUME, + KHR_SHADER_QUAD_CONTROL, + KHR_SUBGROUP_ROTATE, ExtensionCount, };