Skip to content

Commit

Permalink
ROCm 3.1.0 updates
Browse files Browse the repository at this point in the history
  • Loading branch information
skeelyamd committed Feb 27, 2020
1 parent cd47351 commit 56cfa04
Show file tree
Hide file tree
Showing 10 changed files with 116 additions and 13 deletions.
9 changes: 7 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,12 @@ endif()
get_version ( "1.1.9" )
set (SO_MAJOR 1)
set (SO_MINOR 1)
set (SO_PATCH 9)
if ( ${ROCM_PATCH_VERSION} )
set ( SO_PATCH ${ROCM_PATCH_VERSION})
set ( VERSION_PATCH ${ROCM_PATCH_VERSION})
else ()
set(SO_PATCH 9)
endif ()

set ( SO_VERSION_STRING "${SO_MAJOR}.${SO_MINOR}.${SO_PATCH}" )
set ( PACKAGE_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}.${VERSION_COMMIT_COUNT}-${VERSION_JOB}-${VERSION_HASH}" )
Expand All @@ -106,7 +111,7 @@ include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/libamdhsacode )
add_definitions ( -DROCR_BUILD_ID=${PACKAGE_VERSION_STRING} )

## Set RUNPATH - ../../lib covers use of the legacy symlink in /hsa/lib/
set(CMAKE_INSTALL_RPATH "$ORIGIN;$ORIGIN/../../lib")
set(CMAKE_INSTALL_RPATH "$ORIGIN;$ORIGIN/../../lib;$ORIGIN/../../lib64;$ORIGIN/../lib64")

## ------------------------- Linux Compiler and Linker options -------------------------
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -fexceptions -fno-rtti -fvisibility=hidden -Wno-error=sign-compare -Wno-sign-compare -Wno-write-strings -Wno-conversion-null -fno-math-errno -fno-threadsafe-statics -fmerge-all-constants -fms-extensions -Wno-error=comment -Wno-comment -Wno-error=pointer-arith -Wno-pointer-arith -Wno-error=unused-variable -Wno-error=unused-function" )
Expand Down
3 changes: 3 additions & 0 deletions src/core/inc/amd_aql_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ class AqlQueue : public core::Queue, private core::LocalSignal, public core::Doo
/// @brief Change the scheduling priority of the queue
hsa_status_t SetPriority(HSA_QUEUE_PRIORITY priority) override;

/// @brief Destroy ref counted queue
void Destroy() override;

/// @brief Atomically reads the Read index of with Acquire semantics
///
/// @return uint64_t Value of read index
Expand Down
13 changes: 13 additions & 0 deletions src/core/inc/amd_gpu_agent.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,9 @@ class GpuAgent : public GpuAgentInt {
uint32_t group_segment_size,
core::Queue** queue) override;

// @brief Decrement GWS ref count.
void GWSRelease();

// @brief Override from amd::GpuAgentInt.
void AcquireQueueScratch(ScratchInfo& scratch) override;

Expand Down Expand Up @@ -488,6 +491,9 @@ class GpuAgent : public GpuAgentInt {
// @brief Create internal queues and blits.
void InitDma();

// @brief Setup GWS accessing queue.
void InitGWS();


// Bind index of peer device that is connected via xGMI links
lazy_ptr<core::Blit>& GetXgmiBlit(const core::Agent& peer_agent);
Expand All @@ -504,6 +510,13 @@ class GpuAgent : public GpuAgentInt {
// @brief Alternative aperture size. Only on KV.
size_t ape1_size_;

// @brief Queue with GWS access.
struct {
lazy_ptr<core::Queue> queue_;
int ref_ct_;
KernelMutex lock_;
} gws_queue_;

DISALLOW_COPY_AND_ASSIGN(GpuAgent);
};

Expand Down
2 changes: 2 additions & 0 deletions src/core/inc/queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ class Queue : public Checked<0xFA3906A679F9DB49>, private LocalQueue {

virtual ~Queue() {}

virtual void Destroy() { delete this; }

/// @brief Returns the handle of Queue's public data type
///
/// @param queue Pointer to an instance of Queue implementation object
Expand Down
8 changes: 8 additions & 0 deletions src/core/runtime/amd_aql_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,14 @@ AqlQueue::~AqlQueue() {
core::Runtime::runtime_singleton_->system_deallocator()(pm4_ib_buf_);
}

void AqlQueue::Destroy() {
if (amd_queue_.hsa_queue.type & HSA_QUEUE_TYPE_COOPERATIVE) {
agent_->GWSRelease();
return;
}
delete this;
}

uint64_t AqlQueue::LoadReadIndexAcquire() {
return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_acquire);
}
Expand Down
46 changes: 45 additions & 1 deletion src/core/runtime/amd_gpu_agent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ void GpuAgent::InitDma() {
auto blit_lambda = [this](bool use_xgmi, lazy_ptr<core::Queue>& queue) {
const std::string& sdma_override = core::Runtime::runtime_singleton_->flag().enable_sdma();

bool use_sdma = (isa_->GetMajorVersion() != 8);
bool use_sdma = ((isa_->GetMajorVersion() != 8) && (isa_->GetMajorVersion() != 10));
if (sdma_override.size() != 0) use_sdma = (sdma_override == "1");

if (use_sdma && (HSA_PROFILE_BASE == profile_)) {
Expand Down Expand Up @@ -580,6 +580,35 @@ void GpuAgent::InitDma() {
for (uint32_t idx = DefaultBlitCount; idx < blit_cnt_; idx++) {
blits_[idx].reset([blit_lambda, this]() { return blit_lambda(true, queues_[QueueUtility]); });
}

// GWS queues.
InitGWS();
}

void GpuAgent::InitGWS() {
gws_queue_.queue_.reset([this]() {
if (properties_.NumGws == 0) return (core::Queue*)nullptr;
std::unique_ptr<core::Queue> queue(CreateInterceptibleQueue());
if (queue == nullptr)
throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
"Internal queue creation failed.");

uint32_t discard;
auto status = hsaKmtAllocQueueGWS(queue->amd_queue_.hsa_queue.id, 1, &discard);
if (status != HSAKMT_STATUS_SUCCESS)
throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES, "GWS allocation failed.");

queue->amd_queue_.hsa_queue.type = HSA_QUEUE_TYPE_COOPERATIVE | HSA_QUEUE_TYPE_MULTI;
gws_queue_.ref_ct_ = 0;
return queue.release();
});
}

void GpuAgent::GWSRelease() {
ScopedAcquire<KernelMutex> lock(&gws_queue_.lock_);
gws_queue_.ref_ct_--;
if (gws_queue_.ref_ct_ != 0) return;
InitGWS();
}

void GpuAgent::PreloadBlits() {
Expand Down Expand Up @@ -869,6 +898,9 @@ hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const {
case HSA_AMD_AGENT_INFO_DOMAIN:
*((uint32_t*)value) = static_cast<uint32_t>(properties_.Domain);
break;
case HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES:
*((bool*)value) = properties_.NumGws != 0;
break;
default:
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
break;
Expand All @@ -881,6 +913,18 @@ hsa_status_t GpuAgent::QueueCreate(size_t size, hsa_queue_type32_t queue_type,
void* data, uint32_t private_segment_size,
uint32_t group_segment_size,
core::Queue** queue) {
// Handle GWS queues.
if (queue_type & HSA_QUEUE_TYPE_COOPERATIVE) {
ScopedAcquire<KernelMutex> lock(&gws_queue_.lock_);
auto ret = (*gws_queue_.queue_).get();
if (ret != nullptr) {
gws_queue_.ref_ct_++;
*queue = ret;
return HSA_STATUS_SUCCESS;
}
return HSA_STATUS_ERROR_INVALID_QUEUE_CREATION;
}

// AQL queues must be a power of two in length.
if (!IsPowerOfTwo(size)) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
Expand Down
13 changes: 9 additions & 4 deletions src/core/runtime/hsa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -687,8 +687,8 @@ hsa_status_t hsa_queue_create(
TRY;
IS_OPEN();

if ((queue == nullptr) || (size == 0) || (!IsPowerOfTwo(size)) || (type < HSA_QUEUE_TYPE_MULTI) ||
(type > HSA_QUEUE_TYPE_SINGLE)) {
if ((queue == nullptr) || (size == 0) || (!IsPowerOfTwo(size)) ||
(type > HSA_QUEUE_TYPE_COOPERATIVE)) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}

Expand All @@ -701,7 +701,12 @@ hsa_status_t hsa_queue_create(
assert(HSA_STATUS_SUCCESS == status);

if (agent_queue_type == HSA_QUEUE_TYPE_SINGLE &&
type != HSA_QUEUE_TYPE_SINGLE) {
((type & HSA_QUEUE_TYPE_SINGLE) != HSA_QUEUE_TYPE_SINGLE)) {
return HSA_STATUS_ERROR_INVALID_QUEUE_CREATION;
}

if ((type & HSA_QUEUE_TYPE_COOPERATIVE) &&
((type & HSA_QUEUE_TYPE_SINGLE) != HSA_QUEUE_TYPE_MULTI)) {
return HSA_STATUS_ERROR_INVALID_QUEUE_CREATION;
}

Expand Down Expand Up @@ -758,7 +763,7 @@ hsa_status_t hsa_queue_destroy(hsa_queue_t* queue) {
IS_BAD_PTR(queue);
core::Queue* cmd_queue = core::Queue::Convert(queue);
IS_VALID(cmd_queue);
delete cmd_queue;
cmd_queue->Destroy();
return HSA_STATUS_SUCCESS;
CATCH;
}
Expand Down
20 changes: 16 additions & 4 deletions src/inc/hsa.h
Original file line number Diff line number Diff line change
Expand Up @@ -2193,7 +2193,19 @@ typedef enum {
* that support a single producer may be more efficient than queues supporting
* multiple producers.
*/
HSA_QUEUE_TYPE_SINGLE = 1
HSA_QUEUE_TYPE_SINGLE = 1,
/**
* Queue supports cooperative dispatches able to use GWS synchronization.
* Queues of this type must also be of type HSA_QUEUE_TYPE_MULTI and
* may be limited in number. The runtime may return the same queue to serve
* multiple hsa_queue_create calls when this type is given. Callers must
* inspect the returned queue to discover queue size. Queues of this type
* are reference counted and require a matching number of hsa_queue_destroy
* calls to release. Use of multiproducer queue mechanics is required. See
* ::HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES to query agent support for this
* type.
*/
HSA_QUEUE_TYPE_COOPERATIVE = 2
} hsa_queue_type_t;

/**
Expand Down Expand Up @@ -2300,9 +2312,9 @@ typedef struct hsa_queue_s {
* created queue is the maximum of @p size and the value of
* ::HSA_AGENT_INFO_QUEUE_MIN_SIZE in @p agent.
*
* @param[in] type Type of the queue. If the value of
* ::HSA_AGENT_INFO_QUEUE_TYPE in @p agent is ::HSA_QUEUE_TYPE_SINGLE, then @p
* type must also be ::HSA_QUEUE_TYPE_SINGLE.
* @param[in] type Type of the queue, a bitwise OR of hsa_queue_type_t values.
* If the value of ::HSA_AGENT_INFO_QUEUE_TYPE in @p agent is ::HSA_QUEUE_TYPE_SINGLE,
* then @p type must also be ::HSA_QUEUE_TYPE_SINGLE.
*
* @param[in] callback Callback invoked by the HSA runtime for every
* asynchronous event related to the newly created queue. May be NULL. The HSA
Expand Down
7 changes: 6 additions & 1 deletion src/inc/hsa_ext_amd.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,12 @@ typedef enum hsa_amd_agent_info_s {
* to give the full physical location of the Agent.
* The type of this attribute is uint32_t.
*/
HSA_AMD_AGENT_INFO_DOMAIN = 0xA00F
HSA_AMD_AGENT_INFO_DOMAIN = 0xA00F,
/**
* Queries for support of cooperative queues. See ::HSA_QUEUE_TYPE_COOPERATIVE.
* The type of this attribute is bool.
*/
HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES = 0xA010
} hsa_amd_agent_info_t;

typedef struct hsa_amd_hdp_flush_s {
Expand Down
8 changes: 7 additions & 1 deletion src/loader/executable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,13 @@
using namespace amd::hsa;
using namespace amd::hsa::common;

static void __attribute__((noinline, optimize(0))) _loader_debug_state() {};
#if defined __clang__
#define NONOPTIMIZE __attribute__((noinline, optnone))
#else
#define NONOPTIMIZE __attribute__((noinline, optimize(0)))
#endif

NONOPTIMIZE static void _loader_debug_state() {};
r_debug _amdgpu_r_debug __attribute__((visibility("default"))) = {1,
nullptr,
reinterpret_cast<uintptr_t>(&_loader_debug_state),
Expand Down

0 comments on commit 56cfa04

Please sign in to comment.