From abe957e2622ce364d6ff303127c2aa2add61e445 Mon Sep 17 00:00:00 2001 From: Rex Xu Date: Fri, 15 Dec 2023 14:29:36 +0800 Subject: [PATCH] Remove the unused user data EsGsLdsSize This user data was added to keep some compatibility with old PAL. At that time, PAL expected this user data to handle ES-GS merged shader and NGG correctly on GFX9 and GFX10.1. But LLPC doesn't need this user data at all since we always know ES stage when building the pipeline. And the LDS calculation and allocation of GS stage are applied to full ES-GS pipeline. In this change, the user data EsGsLdsSize is not added to entry-point argument. This SGPR is saved. Also, we don't need to tell PAL with metadata EsGsLdsSize. --- lgc/include/lgc/state/IntrinsDefs.h | 10 ++--- lgc/include/lgc/state/ResourceUsage.h | 1 - lgc/patch/ConfigBuilderBase.cpp | 19 ---------- lgc/patch/ConfigBuilderBase.h | 2 - lgc/patch/Gfx9ConfigBuilder.cpp | 4 -- lgc/patch/PatchCopyShader.cpp | 34 ++++------------- lgc/patch/PatchEntryPointMutate.cpp | 38 ------------------- lgc/patch/PatchResourceCollect.cpp | 3 +- lgc/patch/RegisterMetadataBuilder.cpp | 3 -- lgc/patch/ShaderInputs.cpp | 2 - .../shaderdb/gfx11/SgprUserDataInit_Fs.pipe | 6 +-- 11 files changed, 16 insertions(+), 106 deletions(-) diff --git a/lgc/include/lgc/state/IntrinsDefs.h b/lgc/include/lgc/state/IntrinsDefs.h index 05b2920fba..c2300a1b65 100644 --- a/lgc/include/lgc/state/IntrinsDefs.h +++ b/lgc/include/lgc/state/IntrinsDefs.h @@ -65,19 +65,19 @@ static const unsigned GsEmitCutStreamIdMask = 0x300; // Mask of STREAM_ID of the static const unsigned GetRealTime = 0x83; // [7] = 1, [6:0] = 3 // Count of user SGPRs used in copy shader -static const unsigned CopyShaderUserSgprCount = 3; +static const unsigned CopyShaderUserSgprCount = 2; // Entry-point argument index for the stream info in copy shader -static const unsigned CopyShaderEntryArgIdxStreamInfo = 3; +static const unsigned CopyShaderEntryArgIdxStreamInfo = 2; // Entry-point argument index for the stream-out write index in copy shader -static const unsigned CopyShaderEntryArgIdxWriteIndex = 4; +static const unsigned CopyShaderEntryArgIdxWriteIndex = 3; // Entry-point argument index for the stream offsets in copy shader -static const unsigned CopyShaderEntryArgIdxStreamOffset = 5; +static const unsigned CopyShaderEntryArgIdxStreamOffset = 4; // Entry-point argument index for the LDS offset of current vertices in GS-VS ring -static const unsigned CopyShaderEntryArgIdxVertexOffset = 9; +static const unsigned CopyShaderEntryArgIdxVertexOffset = 8; // Enumerates address spaces valid for AMD GPU (similar to LLVM header AMDGPU.h) enum AddrSpace { diff --git a/lgc/include/lgc/state/ResourceUsage.h b/lgc/include/lgc/state/ResourceUsage.h index c6b2c316be..7522eeb0c7 100644 --- a/lgc/include/lgc/state/ResourceUsage.h +++ b/lgc/include/lgc/state/ResourceUsage.h @@ -513,7 +513,6 @@ struct InterfaceData { struct { // Geometry shader struct { - unsigned copyShaderEsGsLdsSize; // ES -> GS ring LDS size (for copy shader) unsigned copyShaderStreamOutTable; // Stream-out table (for copy shader) } gs; diff --git a/lgc/patch/ConfigBuilderBase.cpp b/lgc/patch/ConfigBuilderBase.cpp index 9806f88c91..721bb2b2ec 100644 --- a/lgc/patch/ConfigBuilderBase.cpp +++ b/lgc/patch/ConfigBuilderBase.cpp @@ -208,14 +208,6 @@ void ConfigBuilderBase::setPsSampleMask(bool value) { m_pipelineNode[Util::Abi::PipelineMetadataKey::PsSampleMask] = value; } -// ===================================================================================================================== -// Set ES_GS_LDS_BYTE_SIZE -// -// @param value : Value to set -void ConfigBuilderBase::setEsGsLdsByteSize(unsigned value) { - m_pipelineNode[Util::Abi::PipelineMetadataKey::EsGsLdsSize] = value; -} - // ===================================================================================================================== // Set hardware stage wavefront // @@ -289,17 +281,6 @@ void ConfigBuilderBase::setLdsSizeByteSize(Util::Abi::HardwareStage hwStage, uns hwShaderNode[Util::Abi::HardwareStageMetadataKey::LdsSize] = value; } -// ===================================================================================================================== -// Set ES-GS LDS byte size -// -// @param value : Value to set -void ConfigBuilderBase::setEsGsLdsSize(unsigned value) { - if (value == 0) - return; // Optional - - m_pipelineNode[Util::Abi::PipelineMetadataKey::EsGsLdsSize] = value; -} - // ===================================================================================================================== // Set NGG subgroup size // diff --git a/lgc/patch/ConfigBuilderBase.h b/lgc/patch/ConfigBuilderBase.h index 4266e4b77e..0e822d1da6 100644 --- a/lgc/patch/ConfigBuilderBase.h +++ b/lgc/patch/ConfigBuilderBase.h @@ -80,12 +80,10 @@ class ConfigBuilderBase { void setPsWritesUavs(bool value); void setPsWritesDepth(bool value); void setPsSampleMask(bool value); - void setEsGsLdsByteSize(unsigned value); void setWaveFrontSize(Util::Abi::HardwareStage hwStage, unsigned value); void setApiName(const char *value); void setPipelineType(Util::Abi::PipelineType value); void setLdsSizeByteSize(Util::Abi::HardwareStage hwStage, unsigned value); - void setEsGsLdsSize(unsigned value); void setNggSubgroupSize(unsigned value); void setThreadgroupDimensions(llvm::ArrayRef values); void setStreamOutVertexStrides(llvm::ArrayRef values); diff --git a/lgc/patch/Gfx9ConfigBuilder.cpp b/lgc/patch/Gfx9ConfigBuilder.cpp index aaf3c3d29c..61faaf0830 100644 --- a/lgc/patch/Gfx9ConfigBuilder.cpp +++ b/lgc/patch/Gfx9ConfigBuilder.cpp @@ -1151,7 +1151,6 @@ void ConfigBuilder::buildEsGsRegConfig(ShaderStage shaderStage1, ShaderStage sha SET_REG_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC2_GS, LDS_SIZE, ldsSize); setLdsSizeByteSize(Util::Abi::HardwareStage::Gs, ldsSizeInDwords * 4); - setEsGsLdsSize(calcFactor.esGsLdsSize * 4); unsigned maxVertOut = std::max(1u, static_cast(geometryMode.outputVertices)); SET_REG_FIELD(&config->esGsRegs, VGT_GS_MAX_VERT_OUT, MAX_VERT_OUT, maxVertOut); @@ -1163,8 +1162,6 @@ void ConfigBuilder::buildEsGsRegConfig(ShaderStage shaderStage1, ShaderStage sha SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ONCHIP, VGT_GS_MODE_ONCHIP_ON); SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ES_WRITE_OPTIMIZE, false); SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, GS_WRITE_OPTIMIZE, false); - - setEsGsLdsByteSize(calcFactor.esGsLdsSize * 4); } else { SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ONCHIP, VGT_GS_MODE_ONCHIP_OFF); SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ES_WRITE_OPTIMIZE, false); @@ -1366,7 +1363,6 @@ void ConfigBuilder::buildPrimShaderRegConfig(ShaderStage shaderStage1, ShaderSta const unsigned ldsSize = ldsSizeInDwords >> ldsSizeDwordGranularityShift; SET_REG_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC2_GS, LDS_SIZE, ldsSize); setLdsSizeByteSize(Util::Abi::HardwareStage::Gs, ldsSizeInDwords * 4); - setEsGsLdsSize(calcFactor.esGsLdsSize * 4); if (m_gfxIp.major >= 11) { // Pixel wait sync+ diff --git a/lgc/patch/PatchCopyShader.cpp b/lgc/patch/PatchCopyShader.cpp index 4829de1a27..d109a44c62 100644 --- a/lgc/patch/PatchCopyShader.cpp +++ b/lgc/patch/PatchCopyShader.cpp @@ -45,14 +45,6 @@ #define DEBUG_TYPE "lgc-patch-copy-shader" -namespace llvm { -namespace cl { - -extern opt InRegEsGsLdsSize; - -} // namespace cl -} // namespace llvm - using namespace lgc; using namespace llvm; @@ -111,7 +103,6 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha // // void copyShader( // i32 inreg globalTable, - // i32 inreg esGsLdsSize (GFX9+), // i32 inreg streamOutTable (GFX9+), // i32 inreg streamOutInfo, // i32 inreg streamOutWriteIndex, @@ -122,12 +113,11 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha // i32 vertexOffset) // - argTys = {int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty}; + argTys = {int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty}; - argInReg = {true, true, true, true, true, true, true, true, true, false}; + argInReg = {true, true, true, true, true, true, true, true, false}; // clang-format off argNames = {"globalTable", - "esGsLdsSize", "streamOutTable", "streamOutInfo", "streamOutWriteIndex", @@ -149,15 +139,15 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha // GFX11+: // void copyShader( // i32 inreg globalTable, - // i32 vertexId) + // i32 vertexIndex) if (m_pipelineState->getTargetInfo().getGfxIpVersion().major <= 10) { argTys = {int32Ty}; argInReg = {false}; - argNames = {"vertexId"}; + argNames = {"vertexIndex"}; } else { argTys = {int32Ty, int32Ty}; argInReg = {true, false}; - argNames = {"globalTable", "vertexId"}; + argNames = {"globalTable", "vertexIndex"}; } } @@ -201,12 +191,9 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha auto intfData = m_pipelineState->getShaderInterfaceData(ShaderStageCopyShader); if (!m_pipelineState->getNggControl()->enableNgg) { - // For GFX9+, streamOutTable SGPR index value should be greater than esGsLdsSize - intfData->userDataUsage.gs.copyShaderEsGsLdsSize = 1; - intfData->userDataUsage.gs.copyShaderStreamOutTable = 2; + intfData->userDataUsage.gs.copyShaderStreamOutTable = 1; } else { - // If NGG, both esGsLdsSize and streamOutTable are not used - intfData->userDataUsage.gs.copyShaderEsGsLdsSize = InvalidValue; + // If NGG, streamOutTable is not used intfData->userDataUsage.gs.copyShaderStreamOutTable = InvalidValue; } @@ -221,9 +208,6 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha if (m_pipelineState->enableXfb()) userData[intfData->userDataUsage.gs.copyShaderStreamOutTable] = static_cast(UserDataMapping::StreamOutTable); - if (cl::InRegEsGsLdsSize && m_pipelineState->isGsOnChip()) - userData[intfData->userDataUsage.gs.copyShaderEsGsLdsSize] = - static_cast(UserDataMapping::EsGsLdsSize); m_pipelineState->setUserDataMap(ShaderStageCopyShader, userData); } else { m_pipelineState->getPalMetadata()->setUserDataEntry(ShaderStageCopyShader, 0, UserDataMapping::GlobalTable); @@ -232,10 +216,6 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha intfData->userDataUsage.gs.copyShaderStreamOutTable, UserDataMapping::StreamOutTable); } - if (cl::InRegEsGsLdsSize && m_pipelineState->isGsOnChip()) { - m_pipelineState->getPalMetadata()->setUserDataEntry( - ShaderStageCopyShader, intfData->userDataUsage.gs.copyShaderEsGsLdsSize, UserDataMapping::EsGsLdsSize); - } } } diff --git a/lgc/patch/PatchEntryPointMutate.cpp b/lgc/patch/PatchEntryPointMutate.cpp index 9e4073c9cb..70a594750f 100644 --- a/lgc/patch/PatchEntryPointMutate.cpp +++ b/lgc/patch/PatchEntryPointMutate.cpp @@ -82,16 +82,6 @@ using namespace llvm; using namespace lgc; -namespace llvm { -namespace cl { - -// -inreg-esgs-lds-size: Add a dummy "inreg" argument for ES-GS LDS size, this is to keep consistent with PAL's -// GS on-chip behavior. In the future, if PAL allows hardcoded ES-GS LDS size, this option could be deprecated. -opt InRegEsGsLdsSize("inreg-esgs-lds-size", desc("For GS on-chip, add esGsLdsSize in user data"), init(true)); - -} // namespace cl -} // namespace llvm - // ===================================================================================================================== PatchEntryPointMutate::PatchEntryPointMutate() : m_hasTs(false), m_hasGs(false), @@ -1706,34 +1696,6 @@ void PatchEntryPointMutate::addSpecialUserDataArgs(SmallVectorImpl specialUserDataArgs.push_back(UserDataArg(builder.getInt32Ty(), "viewId", userDataValue, argIdx)); } - // NOTE: Add a dummy "inreg" argument for ES-GS LDS size, this is to keep consistent - // with PAL's GS on-chip behavior (VS is in NGG primitive shader). - bool wantEsGsLdsSize = false; - switch (getMergedShaderStage(m_shaderStage)) { - case ShaderStageVertex: - wantEsGsLdsSize = enableNgg; - break; - case ShaderStageTessControl: - wantEsGsLdsSize = false; - break; - case ShaderStageTessEval: - wantEsGsLdsSize = enableNgg; - break; - case ShaderStageGeometry: - wantEsGsLdsSize = (m_pipelineState->isGsOnChip() && cl::InRegEsGsLdsSize) || enableNgg; - break; - default: - llvm_unreachable("Unexpected shader stage"); - } - if (wantEsGsLdsSize) { - auto userDataValue = UserDataMapping::EsGsLdsSize; - // For a standalone TCS (which can only happen in unit testing, not in a real pipeline), don't add - // the PAL metadata for it, for consistency with the old code. - if (m_shaderStage == ShaderStageVertex && !m_pipelineState->hasShaderStage(ShaderStageVertex)) - userDataValue = UserDataMapping::Invalid; - specialUserDataArgs.push_back(UserDataArg(builder.getInt32Ty(), "esGsLdsSize", userDataValue)); - } - if (getMergedShaderStage(m_shaderStage) == getMergedShaderStage(ShaderStageVertex)) { // This is the VS, or the shader that VS is merged into on GFX9+. auto vsIntfData = m_pipelineState->getShaderInterfaceData(ShaderStageVertex); diff --git a/lgc/patch/PatchResourceCollect.cpp b/lgc/patch/PatchResourceCollect.cpp index d49c2ed82d..9ff449c947 100644 --- a/lgc/patch/PatchResourceCollect.cpp +++ b/lgc/patch/PatchResourceCollect.cpp @@ -727,8 +727,7 @@ bool PatchResourceCollect::checkGsOnChipValidity() { gsResUsage->inOutUsage.gs.calcFactor.esVertsPerSubgroup = esVertsPerSubgroup; gsResUsage->inOutUsage.gs.calcFactor.gsPrimsPerSubgroup = gsPrimsPerSubgroup; - // EsGsLdsSize is passed in a user data SGPR to the merged shader so that the API GS knows where to start - // reading out of LDS. EsGsLdsSize is unnecessary when there is no API GS. + // EsGsLdsSize is unnecessary when there is no API GS. gsResUsage->inOutUsage.gs.calcFactor.esGsLdsSize = hasGs ? expectedEsLdsSize : 0; gsResUsage->inOutUsage.gs.calcFactor.gsOnChipLdsSize = needsLds ? ldsSizeDwords : 0; diff --git a/lgc/patch/RegisterMetadataBuilder.cpp b/lgc/patch/RegisterMetadataBuilder.cpp index 7be14870b3..4652b335e4 100644 --- a/lgc/patch/RegisterMetadataBuilder.cpp +++ b/lgc/patch/RegisterMetadataBuilder.cpp @@ -402,7 +402,6 @@ void RegisterMetadataBuilder::buildEsGsRegisters() { auto hwShaderNode = getHwShaderNode(Util::Abi::HardwareStage::Gs); hwShaderNode[Util::Abi::HardwareStageMetadataKey::LdsSize] = calcLdsSize(ldsSizeInDwords); - setEsGsLdsSize(calcFactor.esGsLdsSize * 4); } // ===================================================================================================================== @@ -673,8 +672,6 @@ void RegisterMetadataBuilder::buildPrimShaderRegisters() { auto hwShaderNode = getHwShaderNode(Util::Abi::HardwareStage::Gs); hwShaderNode[Util::Abi::HardwareStageMetadataKey::LdsSize] = calcLdsSize(ldsSizeInDwords); - if (!m_hasMesh) - setEsGsLdsSize(calcFactor.esGsLdsSize * 4); } // ===================================================================================================================== diff --git a/lgc/patch/ShaderInputs.cpp b/lgc/patch/ShaderInputs.cpp index 5f437a0ee3..4b411a3ce4 100644 --- a/lgc/patch/ShaderInputs.cpp +++ b/lgc/patch/ShaderInputs.cpp @@ -57,8 +57,6 @@ const char *ShaderInputs::getSpecialUserDataName(UserDataMapping kind) { return "DrawIndex"; case UserDataMapping::Workgroup: return "Workgroup"; - case UserDataMapping::EsGsLdsSize: - return "EsGsLdsSize"; case UserDataMapping::ViewId: return "ViewId"; case UserDataMapping::StreamOutTable: diff --git a/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe b/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe index 5b2be37628..f179cb677d 100644 --- a/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe +++ b/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe @@ -302,12 +302,11 @@ colorBuffer[0].blendSrcAlphaToColor = 0 ; CHECK-NEXT: .offchip_lds_en: false ; CHECK-NEXT: .scratch_en: false ; CHECK-NEXT: .scratch_memory_size: 0 -; CHECK-NEXT: .sgpr_count: 0xc +; CHECK-NEXT: .sgpr_count: 0xb ; CHECK-NEXT: .sgpr_limit: 0x6a ; CHECK-NEXT: .trap_present: 0 ; CHECK-NEXT: .user_data_reg_map: ; CHECK-NEXT: - 0x10000000 -; CHECK-NEXT: - 0x1000000a ; CHECK-NEXT: - 0x10000003 ; CHECK-NEXT: - 0x10000004 ; CHECK-NEXT: - 0xffffffff @@ -338,7 +337,8 @@ colorBuffer[0].blendSrcAlphaToColor = 0 ; CHECK-NEXT: - 0xffffffff ; CHECK-NEXT: - 0xffffffff ; CHECK-NEXT: - 0xffffffff -; CHECK-NEXT: .user_sgprs: 0x4 +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: .user_sgprs: 0x3 ; CHECK-NEXT: .vgpr_count: 0x9 ; CHECK-NEXT: .vgpr_limit: 0x100 ; CHECK-NEXT: .wavefront_size: 0x40