Skip to content

Commit

Permalink
Update pal from commit 89573f19
Browse files Browse the repository at this point in the history
* Add navi32 support
* Add new SQTT token mask setting
* Fix RGP setting clock modes in MGPU systems
* Faster cache layer hashing
* Minor tweaks for graphics pipeline shader library fast link
* Scissor in scaled copy compute
* Add logging of the GpuMemoryDesc struct for each creat…
* Allow for library with no funcs in LinkWithLibraries
* Read frontend/backend stack size from .shader_functions
* Add a 'default chip' for RGA offline compiles
* Queue: ensure the first element of internal submit info is initialized
* Change SQTT token mask setting to supress instruction tokens
* Add a setting to control if issuing marker event is allowed for SQTT
* Handle ScissorRect before CopyImageCompute
* Enable 128BPP DCC fast clear support
* Fix hangs caused by command allocator auto-trim
* Optimize rpm blt active flags
* Fix negative 32-bit SPM counter values.
* Remove cp dma sync in nested postamble
* Respect elf alignment for memory allocation
* Fix SlowClearCompute path for color masked
* Set imageVaLocked=1 unconditionally for all RPM CTV and DSV create
* Look up color export shader symbol from shader_functions section
* Add guardband check in restore graphics state
* Bumps to C++20
* Prefer MSAA slow clears on graphics in some cases for performance
* Minor barrier fix for clear since PAL supports read-modify-write clears
* Minor barrier fix with CmdClearDepthStencil
* Patch SPI_SHADER_Z_FORMAT from color export shader
* Added forceShaderRingToVMem as a performance-tuning setting
  • Loading branch information
chuang13 authored and WenqingLiAMD committed Sep 29, 2023
1 parent 2abc775 commit 2de164b
Show file tree
Hide file tree
Showing 151 changed files with 35,228 additions and 32,179 deletions.
21 changes: 20 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,18 @@ include(PalCodegen)
include(PalOptions)
include(PalOverrides)

# Create PAL targets:
# - pal : The overall PAL target that a client links to
# - palUtil : Just the PAL util library
set(CMAKE_FOLDER "${CMAKE_FOLDER}/PAL Libs")
add_library(pal STATIC)
add_library(palUtil STATIC)
target_link_libraries(pal PUBLIC palUtil)

# - palCompilerDeps : Selected parts of PAL core that internal compiler depends on
add_library(palCompilerDeps STATIC)
target_link_libraries(palCompilerDeps PRIVATE palUtil)
target_link_libraries(pal PRIVATE palCompilerDeps)

add_subdirectory(cmake)
add_subdirectory(inc)
Expand All @@ -47,6 +57,15 @@ add_subdirectory(tools)

pal_compile_definitions(pal)
pal_compiler_options(pal)
pal_compile_definitions(palCompilerDeps)
pal_compiler_options(palCompilerDeps)
pal_compile_definitions(palUtil)
pal_compiler_options(palUtil)
pal_setup_generated_code()

nongen_source_groups(${PAL_SOURCE_DIR} pal)
nongen_source_groups(${PAL_SOURCE_DIR}
TARGETS
pal
palCompilerDeps
palUtil
)
1 change: 1 addition & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@ target_sources(pal PRIVATE
PalOverrides.cmake
PalVersionHelper.cmake
)

15 changes: 15 additions & 0 deletions cmake/PalBuildParameters.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,15 @@ if (PAL_BUILD_GFX9)
)
#endif

#if PAL_BUILD_NAVI32
pal_bp( PAL_BUILD_NAVI32 ON MODE "AUTHOR_WARNING"
ASIC_CONFIG
PAL_BUILD_GFX11
PAL_BUILD_NAVI3X
CHIP_HDR_NAVI32
)
#endif

#if PAL_BUILD_NAVI33
pal_bp( PAL_BUILD_NAVI33 ON MODE "AUTHOR_WARNING"
ASIC_CONFIG
Expand All @@ -183,3 +192,9 @@ if (PAL_BUILD_GFX9)

endif() # PAL_BUILD_GFX9

#if PAL_BUILD_RDF
pal_bp(PAL_BUILD_RDF ON)
#endif

# "This must always be enabled unless the client guarantees they do not use GFX/3D queues"
pal_bp(PAL_BUILD_RPM_GFX_SHADERS ON)
38 changes: 25 additions & 13 deletions cmake/PalCodegen.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -206,20 +206,32 @@ function(pal_setup_generated_code)
endif()
endfunction()

function(nongen_source_groups DIR TGT)
function(nongen_source_groups DIR)
# All generated files should have an explicit source_group where they are generated.

get_target_property(_sources ${TGT} SOURCES)
set(_nongen_sources "")
foreach(SOURCE ${_sources})
get_source_file_property(_isgen "${SOURCE}" GENERATED)
if (NOT _isgen)
list(APPEND _nongen_sources "${SOURCE}")
endif()
endforeach()
set(singleValArgs TARGET)
set(multiValArgs TARGETS)
cmake_parse_arguments(PARSE_ARGV 1 SETGEN "" "${singleValArgs}" "${multiValArgs}")

source_group(
TREE ${DIR}/
FILES ${_nongen_sources}
)
if (DEFINED SETGEN_TARGET AND DEFINED SETGEN_TARGETS)
message(FATAL_ERROR "TARGET and TARGETS cannot both be defined at the same time!")
elseif (DEFINED SETGEN_TARGET)
list(APPEND SETGEN_TARGETS ${SETGEN_TARGET})
endif()

foreach(TGT ${SETGEN_TARGETS})
get_target_property(_sources ${TGT} SOURCES)
set(_nongen_sources "")
foreach(SOURCE ${_sources})
get_source_file_property(_isgen "${SOURCE}" GENERATED)
if (NOT _isgen)
list(APPEND _nongen_sources "${SOURCE}")
endif()
endforeach()

source_group(
TREE ${DIR}/
FILES ${_nongen_sources}
)
endforeach()
endfunction()
9 changes: 9 additions & 0 deletions cmake/PalCompileDefinitions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,11 @@ function(pal_compile_definitions_gpu TARGET)
target_compile_definitions(${TARGET} PRIVATE CHIP_HDR_NAVI31=$<BOOL:${CHIP_HDR_NAVI31}>)
#endif

#if PAL_BUILD_NAVI32
target_compile_definitions(${TARGET} PUBLIC PAL_BUILD_NAVI32=$<BOOL:${PAL_BUILD_NAVI32}>)
target_compile_definitions(${TARGET} PRIVATE CHIP_HDR_NAVI32=$<BOOL:${CHIP_HDR_NAVI32}>)
#endif

#if PAL_BUILD_NAVI33
target_compile_definitions(${TARGET} PUBLIC PAL_BUILD_NAVI33=$<BOOL:${PAL_BUILD_NAVI33}>)
target_compile_definitions(${TARGET} PRIVATE CHIP_HDR_NAVI33=$<BOOL:${CHIP_HDR_NAVI33}>)
Expand Down Expand Up @@ -214,6 +219,10 @@ function(pal_compile_definitions TARGET)
target_compile_definitions(${TARGET} PRIVATE PAL_BUILD_OSS4=$<BOOL:${PAL_BUILD_OSS4}>)
endif()

#if PAL_BUILD_RPM_GFX_SHADERS
target_compile_definitions(${TARGET} PUBLIC PAL_BUILD_RPM_GFX_SHADERS=$<BOOL:${PAL_BUILD_RPM_GFX_SHADERS}>)
#endif

target_compile_definitions(${TARGET} PUBLIC PAL_64BIT_ARCHIVE_FILE_FMT=$<BOOL:${PAL_64BIT_ARCHIVE_FILE_FMT}>)

pal_compile_definitions_gpu(${TARGET})
Expand Down
4 changes: 2 additions & 2 deletions cmake/PalCompilerOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ include(CheckCXXCompilerFlag)

function(pal_compiler_options TARGET)
set_target_properties(${TARGET} PROPERTIES
CXX_STANDARD 17
CXX_STANDARD 20
CXX_STANDARD_REQUIRED ON
CXX_EXTENSIONS OFF
POSITION_INDEPENDENT_CODE TRUE
)

target_compile_features(${TARGET} PUBLIC cxx_std_17)
target_compile_features(${TARGET} PUBLIC cxx_std_20)

set(isGNU FALSE)
set(isClang FALSE)
Expand Down
9 changes: 9 additions & 0 deletions cmake/PalOverrides.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ set(ADDR_SI_CHIP_DIR "${PROJECT_SOURCE_DIR}/src/core/hw/gfxip/gfx6/chip")

# GPU Overrides

if (PAL_BUILD_GFX6)
endif()

if(PAL_BUILD_GFX9)
# Generic support for GFX9 cards
set(ADDR_GFX9_BUILD ON)
Expand Down Expand Up @@ -79,6 +82,12 @@ if(PAL_BUILD_GFX9)
set(PAL_SWD_BUILD_NAVI3X ${PAL_BUILD_NAVI3X})
#endif

#if PAL_BUILD_NAVI32
set(ADDR_NAVI32_BUILD ${PAL_BUILD_NAVI32})
set(PAL_SWD_BUILD_NAVI32 ${PAL_BUILD_NAVI32})
set(PAL_SWD_BUILD_NAVI3X ${PAL_BUILD_NAVI3X})
#endif

#if PAL_BUILD_NAVI33
set(ADDR_NAVI33_BUILD ${PAL_BUILD_NAVI33})
set(PAL_SWD_BUILD_NAVI33 ${PAL_BUILD_NAVI33})
Expand Down
42 changes: 24 additions & 18 deletions doc/process/palCodingStandards.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ General Language Restrictions
or memory allocation calls failing. The `PAL_ALERT` macro ***should
be*** used for reporting this sort of failure, when deemed useful.

- All target compilers **must** fully support C++17. The following C++
- All target compilers **must** fully support C++20. The following C++
constructs are explicitly allowed:

- Storage class ***must*** be specified for all enums to allow
Expand Down Expand Up @@ -638,24 +638,33 @@ uint32* pFlags;
- Local variables ***should*** be initialized at their declaration
where possible.

- Structures ***should*** be initialized completely. The preferred
method is to use "= { };" which initializes the entire structure
to 0 (the C / C++ specification states that members not explicitly
initialized are set to 0) in a concise way which is also highly
visible to the optimizer.
- Structures ***should*** be initialized completely. The preferred method is to use _aggregate initialization_ which initializes the entire structure in the following order:
1. If there's a designated initializer for a value, use that.
2. If the struct's declaration specifies a default value, use that.
3. Otherwise, call the type's default constructor (for primitives this will initialize them to 0)

```cpp
SomeStruct structData = { };
struct SomeStruct
{
int x;
int y{ 1 };
SomeStruct* pNext;
};

// { 0, 1, nullptr }
SomeStruct someData{};

// { 9, 1, &someData }
SomeStruct moreData{ .x = 9, .pNext{ &someData } };
```
- Some structures may not allow the "= { }" form (e.g. if an enum is
present) and these ***should*** use memset instead.
- Some structures may not allow the "{}" form (e.g. if a default constructor is private or deleted) and these ***should*** use `std::memset()` instead.
- If a structure is not completely initialized (e.g. in extremely
performance-critical code) a comment ***must*** be added to
document the reason for not doing so.
- const ***should*** be used wherever possible; see "Const Usage".
- const ***should*** be used wherever possible; see ["Const Usage"](#const-usage).
- Local variables ***must*** be initialized via assignment
(`uint32 foo = 2;`) rather than construction (`uint32 foo(2);`).
Expand Down Expand Up @@ -1369,11 +1378,7 @@ case Blah2:
}
```

- Any time a case statement is used without a corresponding break by
design, a comment ***must*** record why the break is not needed.
The exception to this case is when a group of cases all execute
the same code. The example below shows where a comment is and is
not required.
- Fallthrough-behavior: Any time a case statement is used without a corresponding break by design, the `[[fallthrough]]` attribute must be used. The only exception to this is if the case is empty. A comment explaining why we are intentionally falling-through is highly encouraged.

```cpp
switch (operation)
Expand All @@ -1390,11 +1395,12 @@ case OpPauseSignaled:
break;
case OpYield:
RecordYields();
// FALLTHROUGH: After the record, the Yield operation then needs
// After the record, the Yield operation then needs
// to carry out the default operation.
[[fallthrough]];
default:
// Handle the default, unspecified cases. The default case is necessary
// because...
// Handle the default, unspecified cases.
// The default case is necessary because...
break;
}
```
Expand Down
1 change: 1 addition & 0 deletions inc/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#######################################################################################################################

target_include_directories(pal PUBLIC .)
target_include_directories(palCompilerDeps PUBLIC .)

target_sources(pal PRIVATE
CMakeLists.txt
Expand Down
5 changes: 4 additions & 1 deletion inc/core/g_palPipelineAbiMetadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ struct HardwareStageMetadata
uint32 vgprLimit;
/// SGPR count upper limit (only set if different from HW default).
uint32 sgprLimit;

/// Thread-group X/Y/Z dimensions (Compute only).
uint32 threadgroupDimensions[3];
/// Original thread-group X/Y/Z dimensions (Compute only).
Expand Down Expand Up @@ -200,6 +201,7 @@ struct HardwareStageMetadata
uint64 sgprCount : 1;
uint64 vgprLimit : 1;
uint64 sgprLimit : 1;
uint64 placeholder0 : 1;
uint64 threadgroupDimensions : 1;
uint64 origThreadgroupDimensions : 1;
uint64 cbConstUsage : 1;
Expand Down Expand Up @@ -227,7 +229,7 @@ struct HardwareStageMetadata
uint64 writesDepth : 1;
uint64 usesAppendConsume : 1;
uint64 usesPrimId : 1;
uint64 reserved : 27;
uint64 reserved : 26;
};
uint64 uAll;
} hasEntry;
Expand Down Expand Up @@ -2479,6 +2481,7 @@ namespace HardwareStageMetadataKey
static constexpr char SgprCount[] = ".sgpr_count";
static constexpr char VgprLimit[] = ".vgpr_limit";
static constexpr char SgprLimit[] = ".sgpr_limit";

static constexpr char ThreadgroupDimensions[] = ".threadgroup_dimensions";
static constexpr char OrigThreadgroupDimensions[] = ".orig_threadgroup_dimensions";
static constexpr char CbConstUsages[] = ".cb_const_usages";
Expand Down
7 changes: 7 additions & 0 deletions inc/core/pal.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,13 @@ struct CommonShaderStats
} flags; ///< Shader compilation stat flags.
};

/// Per-thread stack sizes
struct CompilerStackSizes
{
uint32 backendSize; ///< Managed by compiler backend
uint32 frontendSize; ///< Managed by compiler frontend
};

///@{
/// Determines whether two ShaderHashes or PipelineHashes are equal.
///
Expand Down
3 changes: 2 additions & 1 deletion inc/core/palCacheLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,8 @@ struct TrackingCacheCreateInfo

using TrackedHashSet = HashSet<
Hash128,
ForwardAllocator>;
ForwardAllocator,
MetroHash::HashFunc>;

using TrackedHashIter = TrackedHashSet::Iterator;

Expand Down
8 changes: 3 additions & 5 deletions inc/core/palCmdAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,7 @@ struct CmdAllocatorCreateInfo
/// Must be greater than zero even if the client doesn't plan on using this
/// allocation type.
uint32 allocFreeThreshold; ///< Minimum count of free allocations that the allocator should keep around
/// for fast reuse. It is either used when the autoTrimMemory flag is set
/// or the Trim() function is called explicitly.
/// for fast reuse. It is used when the autoTrimMemory flag is set.
} allocInfo[CmdAllocatorTypeCount]; ///< Information for each allocation type.
};

Expand Down Expand Up @@ -154,9 +153,8 @@ class ICmdAllocator : public IDestroyable
/// @param [in] allocTypeMask Gives control whether trimming will be applied for each CmdAllocType.
/// Use (1 << CmdAllocatorTypeCount) - 1 to apply trimming to all types.
/// When trimming only the embedded date use (1 << EmbeddedDataAlloc).
/// @param [in] dynamicThreshold Allows to use a higher trim threshold than given for at the CmdAllocator
/// initialization. The maximum of this argument and the type specific static
/// threshold will be used (thus keeping more allocations around).
/// @param [in] dynamicThreshold Minimum count of free allocations that the allocator should keep around

virtual Result Trim(uint32 allocTypeMask, uint32 dynamicThreshold) = 0;

/// Query the numbers of allocations and chunks of the given CmdAllocator type.
Expand Down
16 changes: 11 additions & 5 deletions inc/core/palCmdBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -1964,7 +1964,11 @@ struct CmdBufInfo
#endif
uint64 frameIndex; ///< The frame index of this command buffer. It is only required for the
/// DirectCapture feature

#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 822
uint32 vidPnSourceId; ///< The display source id for the DirectCapture feature. Clients must set
/// a valid vidPnSourceId when privateFlip flag is set and pDirectCapMemory
/// is nullptr.
#endif
};

/// Specifies rotation angle between two images. Used as input to ICmdBuffer::CmdScaledCopyImage.
Expand Down Expand Up @@ -3467,9 +3471,9 @@ class ICmdBuffer : public IDestroyable
/// In practice, this is the case when vkCmdClearColorAttachments() is called in a secondary command buffer in
/// Vulkan where the color attachments are inherited.
///
/// This requires regionCount being specified since resource size is for sure to be known.
///
/// The bound color targets shouldn't have UndefinedSwizzledFormat as their swizzle format.
/// This requires regionCount being specified since resource size is for sure to be known. The bound color targets
/// shouldn't have UndefinedSwizzledFormat as their swizzle format. When issue barrier for cleared color targets,
/// should use PipelineStageColorTarget and CoherColorTarget instead of PipelineStageBlt and CoherClear.
///
/// @param [in] colorTargetCount Number of bound color target that needs to be cleared.
/// @param [in] pBoundColorTargets Color target information for the bound color targets.
Expand Down Expand Up @@ -3532,7 +3536,9 @@ class ICmdBuffer : public IDestroyable
/// In practice, this is the case when vkCmdClearColorAttachments() is called in a secondary command buffer in
/// Vulkan where the color attachments are inherited.
///
/// This requires regionCount being specified since resource size is for sure to be known.
/// This requires regionCount being specified since resource size is for sure to be known. When issue barrier for
/// cleared depth stencil targets, should use PipelineStageEarlyDsTarget/PipelineStageLateDsTarget and
/// CoherDepthStencilTarget instead of PipelineStageBlt and CoherClear.
///
/// @param [in] depth Depth clear value.
/// @param [in] stencil Stencil clear value.
Expand Down
Loading

0 comments on commit 2de164b

Please sign in to comment.