Skip to content

Commit

Permalink
Remove the unused user data EsGsLdsSize
Browse files Browse the repository at this point in the history
This user data was added to keep some compatibility with old PAL. At
that time, PAL expected this user data to handle ES-GS merged shader and
NGG correctly on GFX9 and GFX10.1. But LLPC doesn't need this user data
at all since we always know ES stage when building the pipeline. And the
LDS calculation and allocation of GS stage are applied to full ES-GS
pipeline.

In this change, the user data EsGsLdsSize is not added to entry-point
argument. This SGPR is saved. Also, we don't need to tell PAL with
metadata EsGsLdsSize.
  • Loading branch information
amdrexu committed Dec 20, 2023
1 parent dcae6f6 commit abe957e
Show file tree
Hide file tree
Showing 11 changed files with 16 additions and 106 deletions.
10 changes: 5 additions & 5 deletions lgc/include/lgc/state/IntrinsDefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,19 @@ static const unsigned GsEmitCutStreamIdMask = 0x300; // Mask of STREAM_ID of the
static const unsigned GetRealTime = 0x83; // [7] = 1, [6:0] = 3

// Count of user SGPRs used in copy shader
static const unsigned CopyShaderUserSgprCount = 3;
static const unsigned CopyShaderUserSgprCount = 2;

// Entry-point argument index for the stream info in copy shader
static const unsigned CopyShaderEntryArgIdxStreamInfo = 3;
static const unsigned CopyShaderEntryArgIdxStreamInfo = 2;

// Entry-point argument index for the stream-out write index in copy shader
static const unsigned CopyShaderEntryArgIdxWriteIndex = 4;
static const unsigned CopyShaderEntryArgIdxWriteIndex = 3;

// Entry-point argument index for the stream offsets in copy shader
static const unsigned CopyShaderEntryArgIdxStreamOffset = 5;
static const unsigned CopyShaderEntryArgIdxStreamOffset = 4;

// Entry-point argument index for the LDS offset of current vertices in GS-VS ring
static const unsigned CopyShaderEntryArgIdxVertexOffset = 9;
static const unsigned CopyShaderEntryArgIdxVertexOffset = 8;

// Enumerates address spaces valid for AMD GPU (similar to LLVM header AMDGPU.h)
enum AddrSpace {
Expand Down
1 change: 0 additions & 1 deletion lgc/include/lgc/state/ResourceUsage.h
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,6 @@ struct InterfaceData {
struct {
// Geometry shader
struct {
unsigned copyShaderEsGsLdsSize; // ES -> GS ring LDS size (for copy shader)
unsigned copyShaderStreamOutTable; // Stream-out table (for copy shader)
} gs;

Expand Down
19 changes: 0 additions & 19 deletions lgc/patch/ConfigBuilderBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,14 +208,6 @@ void ConfigBuilderBase::setPsSampleMask(bool value) {
m_pipelineNode[Util::Abi::PipelineMetadataKey::PsSampleMask] = value;
}

// =====================================================================================================================
// Set ES_GS_LDS_BYTE_SIZE
//
// @param value : Value to set
void ConfigBuilderBase::setEsGsLdsByteSize(unsigned value) {
m_pipelineNode[Util::Abi::PipelineMetadataKey::EsGsLdsSize] = value;
}

// =====================================================================================================================
// Set hardware stage wavefront
//
Expand Down Expand Up @@ -289,17 +281,6 @@ void ConfigBuilderBase::setLdsSizeByteSize(Util::Abi::HardwareStage hwStage, uns
hwShaderNode[Util::Abi::HardwareStageMetadataKey::LdsSize] = value;
}

// =====================================================================================================================
// Set ES-GS LDS byte size
//
// @param value : Value to set
void ConfigBuilderBase::setEsGsLdsSize(unsigned value) {
if (value == 0)
return; // Optional

m_pipelineNode[Util::Abi::PipelineMetadataKey::EsGsLdsSize] = value;
}

// =====================================================================================================================
// Set NGG subgroup size
//
Expand Down
2 changes: 0 additions & 2 deletions lgc/patch/ConfigBuilderBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,10 @@ class ConfigBuilderBase {
void setPsWritesUavs(bool value);
void setPsWritesDepth(bool value);
void setPsSampleMask(bool value);
void setEsGsLdsByteSize(unsigned value);
void setWaveFrontSize(Util::Abi::HardwareStage hwStage, unsigned value);
void setApiName(const char *value);
void setPipelineType(Util::Abi::PipelineType value);
void setLdsSizeByteSize(Util::Abi::HardwareStage hwStage, unsigned value);
void setEsGsLdsSize(unsigned value);
void setNggSubgroupSize(unsigned value);
void setThreadgroupDimensions(llvm::ArrayRef<unsigned> values);
void setStreamOutVertexStrides(llvm::ArrayRef<unsigned> values);
Expand Down
4 changes: 0 additions & 4 deletions lgc/patch/Gfx9ConfigBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1151,7 +1151,6 @@ void ConfigBuilder::buildEsGsRegConfig(ShaderStage shaderStage1, ShaderStage sha
SET_REG_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC2_GS, LDS_SIZE, ldsSize);

setLdsSizeByteSize(Util::Abi::HardwareStage::Gs, ldsSizeInDwords * 4);
setEsGsLdsSize(calcFactor.esGsLdsSize * 4);

unsigned maxVertOut = std::max(1u, static_cast<unsigned>(geometryMode.outputVertices));
SET_REG_FIELD(&config->esGsRegs, VGT_GS_MAX_VERT_OUT, MAX_VERT_OUT, maxVertOut);
Expand All @@ -1163,8 +1162,6 @@ void ConfigBuilder::buildEsGsRegConfig(ShaderStage shaderStage1, ShaderStage sha
SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ONCHIP, VGT_GS_MODE_ONCHIP_ON);
SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ES_WRITE_OPTIMIZE, false);
SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, GS_WRITE_OPTIMIZE, false);

setEsGsLdsByteSize(calcFactor.esGsLdsSize * 4);
} else {
SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ONCHIP, VGT_GS_MODE_ONCHIP_OFF);
SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ES_WRITE_OPTIMIZE, false);
Expand Down Expand Up @@ -1366,7 +1363,6 @@ void ConfigBuilder::buildPrimShaderRegConfig(ShaderStage shaderStage1, ShaderSta
const unsigned ldsSize = ldsSizeInDwords >> ldsSizeDwordGranularityShift;
SET_REG_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC2_GS, LDS_SIZE, ldsSize);
setLdsSizeByteSize(Util::Abi::HardwareStage::Gs, ldsSizeInDwords * 4);
setEsGsLdsSize(calcFactor.esGsLdsSize * 4);

if (m_gfxIp.major >= 11) {
// Pixel wait sync+
Expand Down
34 changes: 7 additions & 27 deletions lgc/patch/PatchCopyShader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,6 @@

#define DEBUG_TYPE "lgc-patch-copy-shader"

namespace llvm {
namespace cl {

extern opt<bool> InRegEsGsLdsSize;

} // namespace cl
} // namespace llvm

using namespace lgc;
using namespace llvm;

Expand Down Expand Up @@ -111,7 +103,6 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha
//
// void copyShader(
// i32 inreg globalTable,
// i32 inreg esGsLdsSize (GFX9+),
// i32 inreg streamOutTable (GFX9+),
// i32 inreg streamOutInfo,
// i32 inreg streamOutWriteIndex,
Expand All @@ -122,12 +113,11 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha
// i32 vertexOffset)
//

argTys = {int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty};
argTys = {int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty};

argInReg = {true, true, true, true, true, true, true, true, true, false};
argInReg = {true, true, true, true, true, true, true, true, false};
// clang-format off
argNames = {"globalTable",
"esGsLdsSize",
"streamOutTable",
"streamOutInfo",
"streamOutWriteIndex",
Expand All @@ -149,15 +139,15 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha
// GFX11+:
// void copyShader(
// i32 inreg globalTable,
// i32 vertexId)
// i32 vertexIndex)
if (m_pipelineState->getTargetInfo().getGfxIpVersion().major <= 10) {
argTys = {int32Ty};
argInReg = {false};
argNames = {"vertexId"};
argNames = {"vertexIndex"};
} else {
argTys = {int32Ty, int32Ty};
argInReg = {true, false};
argNames = {"globalTable", "vertexId"};
argNames = {"globalTable", "vertexIndex"};
}
}

Expand Down Expand Up @@ -201,12 +191,9 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha
auto intfData = m_pipelineState->getShaderInterfaceData(ShaderStageCopyShader);

if (!m_pipelineState->getNggControl()->enableNgg) {
// For GFX9+, streamOutTable SGPR index value should be greater than esGsLdsSize
intfData->userDataUsage.gs.copyShaderEsGsLdsSize = 1;
intfData->userDataUsage.gs.copyShaderStreamOutTable = 2;
intfData->userDataUsage.gs.copyShaderStreamOutTable = 1;
} else {
// If NGG, both esGsLdsSize and streamOutTable are not used
intfData->userDataUsage.gs.copyShaderEsGsLdsSize = InvalidValue;
// If NGG, streamOutTable is not used
intfData->userDataUsage.gs.copyShaderStreamOutTable = InvalidValue;
}

Expand All @@ -221,9 +208,6 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha
if (m_pipelineState->enableXfb())
userData[intfData->userDataUsage.gs.copyShaderStreamOutTable] =
static_cast<unsigned>(UserDataMapping::StreamOutTable);
if (cl::InRegEsGsLdsSize && m_pipelineState->isGsOnChip())
userData[intfData->userDataUsage.gs.copyShaderEsGsLdsSize] =
static_cast<unsigned>(UserDataMapping::EsGsLdsSize);
m_pipelineState->setUserDataMap(ShaderStageCopyShader, userData);
} else {
m_pipelineState->getPalMetadata()->setUserDataEntry(ShaderStageCopyShader, 0, UserDataMapping::GlobalTable);
Expand All @@ -232,10 +216,6 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha
intfData->userDataUsage.gs.copyShaderStreamOutTable,
UserDataMapping::StreamOutTable);
}
if (cl::InRegEsGsLdsSize && m_pipelineState->isGsOnChip()) {
m_pipelineState->getPalMetadata()->setUserDataEntry(
ShaderStageCopyShader, intfData->userDataUsage.gs.copyShaderEsGsLdsSize, UserDataMapping::EsGsLdsSize);
}
}
}

Expand Down
38 changes: 0 additions & 38 deletions lgc/patch/PatchEntryPointMutate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,6 @@
using namespace llvm;
using namespace lgc;

namespace llvm {
namespace cl {

// -inreg-esgs-lds-size: Add a dummy "inreg" argument for ES-GS LDS size, this is to keep consistent with PAL's
// GS on-chip behavior. In the future, if PAL allows hardcoded ES-GS LDS size, this option could be deprecated.
opt<bool> InRegEsGsLdsSize("inreg-esgs-lds-size", desc("For GS on-chip, add esGsLdsSize in user data"), init(true));

} // namespace cl
} // namespace llvm

// =====================================================================================================================
PatchEntryPointMutate::PatchEntryPointMutate()
: m_hasTs(false), m_hasGs(false),
Expand Down Expand Up @@ -1706,34 +1696,6 @@ void PatchEntryPointMutate::addSpecialUserDataArgs(SmallVectorImpl<UserDataArg>
specialUserDataArgs.push_back(UserDataArg(builder.getInt32Ty(), "viewId", userDataValue, argIdx));
}

// NOTE: Add a dummy "inreg" argument for ES-GS LDS size, this is to keep consistent
// with PAL's GS on-chip behavior (VS is in NGG primitive shader).
bool wantEsGsLdsSize = false;
switch (getMergedShaderStage(m_shaderStage)) {
case ShaderStageVertex:
wantEsGsLdsSize = enableNgg;
break;
case ShaderStageTessControl:
wantEsGsLdsSize = false;
break;
case ShaderStageTessEval:
wantEsGsLdsSize = enableNgg;
break;
case ShaderStageGeometry:
wantEsGsLdsSize = (m_pipelineState->isGsOnChip() && cl::InRegEsGsLdsSize) || enableNgg;
break;
default:
llvm_unreachable("Unexpected shader stage");
}
if (wantEsGsLdsSize) {
auto userDataValue = UserDataMapping::EsGsLdsSize;
// For a standalone TCS (which can only happen in unit testing, not in a real pipeline), don't add
// the PAL metadata for it, for consistency with the old code.
if (m_shaderStage == ShaderStageVertex && !m_pipelineState->hasShaderStage(ShaderStageVertex))
userDataValue = UserDataMapping::Invalid;
specialUserDataArgs.push_back(UserDataArg(builder.getInt32Ty(), "esGsLdsSize", userDataValue));
}

if (getMergedShaderStage(m_shaderStage) == getMergedShaderStage(ShaderStageVertex)) {
// This is the VS, or the shader that VS is merged into on GFX9+.
auto vsIntfData = m_pipelineState->getShaderInterfaceData(ShaderStageVertex);
Expand Down
3 changes: 1 addition & 2 deletions lgc/patch/PatchResourceCollect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -727,8 +727,7 @@ bool PatchResourceCollect::checkGsOnChipValidity() {
gsResUsage->inOutUsage.gs.calcFactor.esVertsPerSubgroup = esVertsPerSubgroup;
gsResUsage->inOutUsage.gs.calcFactor.gsPrimsPerSubgroup = gsPrimsPerSubgroup;

// EsGsLdsSize is passed in a user data SGPR to the merged shader so that the API GS knows where to start
// reading out of LDS. EsGsLdsSize is unnecessary when there is no API GS.
// EsGsLdsSize is unnecessary when there is no API GS.
gsResUsage->inOutUsage.gs.calcFactor.esGsLdsSize = hasGs ? expectedEsLdsSize : 0;
gsResUsage->inOutUsage.gs.calcFactor.gsOnChipLdsSize = needsLds ? ldsSizeDwords : 0;

Expand Down
3 changes: 0 additions & 3 deletions lgc/patch/RegisterMetadataBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,6 @@ void RegisterMetadataBuilder::buildEsGsRegisters() {

auto hwShaderNode = getHwShaderNode(Util::Abi::HardwareStage::Gs);
hwShaderNode[Util::Abi::HardwareStageMetadataKey::LdsSize] = calcLdsSize(ldsSizeInDwords);
setEsGsLdsSize(calcFactor.esGsLdsSize * 4);
}

// =====================================================================================================================
Expand Down Expand Up @@ -673,8 +672,6 @@ void RegisterMetadataBuilder::buildPrimShaderRegisters() {

auto hwShaderNode = getHwShaderNode(Util::Abi::HardwareStage::Gs);
hwShaderNode[Util::Abi::HardwareStageMetadataKey::LdsSize] = calcLdsSize(ldsSizeInDwords);
if (!m_hasMesh)
setEsGsLdsSize(calcFactor.esGsLdsSize * 4);
}

// =====================================================================================================================
Expand Down
2 changes: 0 additions & 2 deletions lgc/patch/ShaderInputs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ const char *ShaderInputs::getSpecialUserDataName(UserDataMapping kind) {
return "DrawIndex";
case UserDataMapping::Workgroup:
return "Workgroup";
case UserDataMapping::EsGsLdsSize:
return "EsGsLdsSize";
case UserDataMapping::ViewId:
return "ViewId";
case UserDataMapping::StreamOutTable:
Expand Down
6 changes: 3 additions & 3 deletions llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe
Original file line number Diff line number Diff line change
Expand Up @@ -302,12 +302,11 @@ colorBuffer[0].blendSrcAlphaToColor = 0
; CHECK-NEXT: .offchip_lds_en: false
; CHECK-NEXT: .scratch_en: false
; CHECK-NEXT: .scratch_memory_size: 0
; CHECK-NEXT: .sgpr_count: 0xc
; CHECK-NEXT: .sgpr_count: 0xb
; CHECK-NEXT: .sgpr_limit: 0x6a
; CHECK-NEXT: .trap_present: 0
; CHECK-NEXT: .user_data_reg_map:
; CHECK-NEXT: - 0x10000000
; CHECK-NEXT: - 0x1000000a
; CHECK-NEXT: - 0x10000003
; CHECK-NEXT: - 0x10000004
; CHECK-NEXT: - 0xffffffff
Expand Down Expand Up @@ -338,7 +337,8 @@ colorBuffer[0].blendSrcAlphaToColor = 0
; CHECK-NEXT: - 0xffffffff
; CHECK-NEXT: - 0xffffffff
; CHECK-NEXT: - 0xffffffff
; CHECK-NEXT: .user_sgprs: 0x4
; CHECK-NEXT: - 0xffffffff
; CHECK-NEXT: .user_sgprs: 0x3
; CHECK-NEXT: .vgpr_count: 0x9
; CHECK-NEXT: .vgpr_limit: 0x100
; CHECK-NEXT: .wavefront_size: 0x40
Expand Down

0 comments on commit abe957e

Please sign in to comment.