Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ROCP_SDK: Enabling agent profiling mode. #249

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 51 additions & 77 deletions src/components/rocp_sdk/sdk_class.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "sdk_class.hpp"
#include <stdio.h>

#define AGENT_PROFILE_MODE
#define ROCPROF_SDK_BUG_WORKAROUND

namespace papi_rocpsdk
Expand Down Expand Up @@ -30,8 +31,12 @@ struct event_instance_info_t{
std::atomic<unsigned int> _global_papi_event_count{0};
std::atomic<unsigned int> _base_event_count{0};
static std::shared_mutex profile_cache_mutex = {};
static std::mutex agent_mutex = {};
static std::condition_variable agent_cond_var = {};
static bool data_is_ready = false;
static std::string _rocp_sdk_error_string;
static long long int *_counter_values = NULL;
static int rpsdk_profiling_mode = RPSDK_MODE_CALLBACK_DISPATCH;

agent_map_t gpu_agents = agent_map_t{};

Expand All @@ -40,12 +45,14 @@ std::unordered_map<std::string, base_event_info_t> base_events_by_name = {};
std::set<int> active_device_set = {};
vendorp_ctx_t active_event_set_ctx;

std::unordered_map<uint64_t, rocprofiler_profile_config_id_t> dispatch_profile_cache = {};
std::unordered_map<uint64_t, rocprofiler_profile_config_id_t> rpsdk_profile_cache = {};
std::unordered_map<unsigned int, event_instance_info_t> papi_id_to_event_instance = {};
std::unordered_map<std::string, unsigned int> event_instance_name_to_papi_id = {};

/* *** */

typedef rocprofiler_status_t (* rocprofiler_flush_buffer_t) (rocprofiler_buffer_id_t buffer_id);

typedef rocprofiler_status_t (* rocprofiler_sample_agent_profile_counting_service_t) (rocprofiler_context_id_t context_id, rocprofiler_user_data_t user_data, rocprofiler_counter_flag_t flags);

typedef rocprofiler_status_t (* rocprofiler_configure_callback_dispatch_profile_counting_service_t) (rocprofiler_context_id_t context_id, rocprofiler_profile_counting_dispatch_callback_t dispatch_callback, void *dispatch_callback_args, rocprofiler_profile_counting_record_callback_t record_callback, void *record_callback_args);
Expand Down Expand Up @@ -88,6 +95,7 @@ typedef rocprofiler_status_t (* rocprofiler_query_record_counter_id_t) (rocprofi

typedef rocprofiler_status_t (* rocprofiler_query_record_dimension_position_t) (rocprofiler_counter_instance_id_t id, rocprofiler_counter_dimension_id_t dim, unsigned long *pos);

rocprofiler_flush_buffer_t rocprofiler_flush_buffer_FPTR;
rocprofiler_sample_agent_profile_counting_service_t rocprofiler_sample_agent_profile_counting_service_FPTR;
rocprofiler_configure_callback_dispatch_profile_counting_service_t rocprofiler_configure_callback_dispatch_profile_counting_service_FPTR;
rocprofiler_configure_agent_profile_counting_service_t rocprofiler_configure_agent_profile_counting_service_FPTR;
Expand Down Expand Up @@ -132,21 +140,17 @@ get_error_string()
}

int
get_profiling_mode(){
#if defined(AGENT_PROFILE_MODE)
// Warning: RPSDK_MODE_AGENT_PROFILE mode does not work properly yet, due to rocprofiler-sdk bugs.
static int profiling_mode = RPSDK_MODE_AGENT_PROFILE;
#else
static int profiling_mode = RPSDK_MODE_CALLBACK_DISPATCH;
#endif
return profiling_mode;
get_profiling_mode(void)
{
return rpsdk_profiling_mode;
}

/* ** */
static char *
obtain_function_pointers(void *dllHandle)
{

DLL_SYM_CHECK(rocprofiler_flush_buffer, rocprofiler_flush_buffer_t);
DLL_SYM_CHECK(rocprofiler_sample_agent_profile_counting_service, rocprofiler_sample_agent_profile_counting_service_t);
DLL_SYM_CHECK(rocprofiler_configure_callback_dispatch_profile_counting_service, rocprofiler_configure_callback_dispatch_profile_counting_service_t);
DLL_SYM_CHECK(rocprofiler_configure_agent_profile_counting_service, rocprofiler_configure_agent_profile_counting_service_t);
Expand Down Expand Up @@ -337,8 +341,8 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data,
// time. If there is nothing in the cache, they will exit this scope
// and the lock will be automatically released.
auto rlock = std::shared_lock{profile_cache_mutex};
auto pos = dispatch_profile_cache.find(dispatch_data.dispatch_info.agent_id.handle);
if( dispatch_profile_cache.end() != pos ){
auto pos = rpsdk_profile_cache.find(dispatch_data.dispatch_info.agent_id.handle);
if( rpsdk_profile_cache.end() != pos ){
*config = pos->second;
}
return;
Expand Down Expand Up @@ -382,55 +386,12 @@ set_profile(rocprofiler_context_id_t context_id,
rocprofiler_agent_set_profile_callback_t set_config,
void*)
{
static std::shared_mutex m_mutex = {};
static std::unordered_map<uint64_t, rocprofiler_profile_config_id_t> profile_cache = {};

auto search_cache = [&]() {
auto pos = profile_cache.find(agent.handle);
if( profile_cache.end() != pos ){
set_config(context_id, pos->second);
return true;
}
return false;
};

{
auto rlock = std::shared_lock{m_mutex};
if(search_cache()) return;
}

auto wlock = std::unique_lock{m_mutex};
if(search_cache()) return;

// Create a collection profile for the counters
rocprofiler_profile_config_id_t profile;

std::vector<rocprofiler_counter_id_t> event_vid_list = {};
std::set<uint64_t> id_set = {};
for( int ei=0; ei<active_event_set_ctx->num_events; ei++ ){
auto e_inst = papi_id_to_event_instance.find( active_event_set_ctx->event_ids[ei] );
// If the event does not exist in the papi_id_to_event_instance map, ignore it.
if( papi_id_to_event_instance.end() == e_inst ){
continue;
}
rocprofiler_counter_id_t vid = e_inst->second.counter_info.id;
// If the vid of the event (base event) is not already in the event_vid_list, then add it.
if( id_set.find(vid.handle) == id_set.end() ){
event_vid_list.emplace_back( vid );
id_set.emplace( vid.handle );
}
auto rlock = std::shared_lock{profile_cache_mutex};
auto pos = rpsdk_profile_cache.find(agent.handle);
if( rpsdk_profile_cache.end() != pos ){
set_config(context_id, pos->second);
}

//Note: Right now, if a problem occurs, we can't tell which event caused the problem.
ROCPROFILER_CALL(rocprofiler_create_profile_config_FPTR(agent,
event_vid_list.data(),
event_vid_list.size(),
&profile),
"Could not construct profile cfg");

profile_cache.emplace(agent.handle, profile);
// Return the profile to collect those counters for this dispatch
set_config(context_id, profile);
return;
}


Expand Down Expand Up @@ -496,18 +457,18 @@ buffered_callback(rocprofiler_context_id_t,
// Print the returned counter data.
auto *record = static_cast<rocprofiler_record_counter_t*>(header->payload);
record_data.emplace_back(record);
ss << " (Id: " << record->id << " Value [D]: " << record->counter_value << ","
<< " user_data: " << record->user_data.value << ")\n";
}
}
std::cout << "[" << __FUNCTION__ << "]:\n" << ss.str() << "--------------------------------" << std::endl;

accum_values(record_data.data(), record_data.size());
for( int ei=0; ei<active_event_set_ctx->num_events; ei++ ){
std::cout << _counter_values[ei] << "\n";
{
std::lock_guard<std::mutex> lock(agent_mutex);
accum_values(record_data.data(), record_data.size());
data_is_ready = true;
}
std::cout << "------------------------" << std::endl;

// Notify read_sample() that the counter values have been accumulated into
// the global array _counter_values[]
agent_cond_var.notify_all();
}
#endif // defined(AGENT_PROFILE_MODE)

Expand All @@ -524,8 +485,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
if( RPSDK_MODE_AGENT_PROFILE == get_profiling_mode() ){
#if defined(AGENT_PROFILE_MODE)
ROCPROFILER_CALL(rocprofiler_create_buffer_FPTR(get_client_ctx(),
1024,
0,
32*1024,
16*1024,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
buffered_callback,
tool_data,
Expand Down Expand Up @@ -640,6 +601,11 @@ void stop_counting(void){
void
start_counting(vendorp_ctx_t ctx){

// Store a pointer to the counter value array in a global variable so that
// our functions that are called from the ROCprofiler-SDK (instead of our
// API) can still find the array.
_counter_values = ctx->counters;

#if defined(AGENT_PROFILE_MODE)
if( RPSDK_MODE_AGENT_PROFILE == get_profiling_mode() ){
for(auto act_dev_it=active_device_set.begin(); act_dev_it!=active_device_set.end(); ++act_dev_it){
Expand All @@ -655,21 +621,24 @@ start_counting(vendorp_ctx_t ctx){
}
#endif

_counter_values = ctx->counters;
ROCPROFILER_CALL(rocprofiler_start_context_FPTR(get_client_ctx()), "start context");
}

/* ** */
int
read_sample(){
int papi_errno = PAPI_OK;
static uint64_t count=0;

int ret_val = rocprofiler_sample_agent_profile_counting_service_FPTR(
get_client_ctx(), {.value = count}, ROCPROFILER_COUNTER_FLAG_NONE);
get_client_ctx(), {}, ROCPROFILER_COUNTER_FLAG_NONE);

if( ret_val == ROCPROFILER_STATUS_SUCCESS ){
++count;
data_is_ready = false;
ROCPROFILER_CALL(rocprofiler_flush_buffer_FPTR(get_buffer()), "buffer flush");
// rocprofiler_flush_buffer() will call buffered_callback() which will
// wake us up using this condition_variable.
std::unique_lock<std::mutex> lock(agent_mutex);
agent_cond_var.wait(lock, []{ return data_is_ready; });
}else{
goto fn_fail;
}
Expand Down Expand Up @@ -908,14 +877,14 @@ empty_active_event_set(void){

/* ** */
int
set_dispatch_profiles(vendorp_ctx_t ctx){
set_profile_cache(vendorp_ctx_t ctx){
std::map<uint64_t, std::vector<event_instance_info_t> > active_events_per_device;

// Acquire a unique lock so that no other thread can try to read
// the profile cache while we are modifying it.
auto wlock = std::unique_lock{profile_cache_mutex};

dispatch_profile_cache.clear();
rpsdk_profile_cache.clear();

for( int i=0; i < ctx->num_events; ++i) {
// make sure the event exists.
Expand Down Expand Up @@ -953,7 +922,7 @@ set_dispatch_profiles(vendorp_ctx_t ctx){
&profile),
"Could not construct profile cfg");

dispatch_profile_cache.emplace(agent->id.handle, profile);
rpsdk_profile_cache.emplace(agent->id.handle, profile);
}

return PAPI_OK;
Expand All @@ -973,6 +942,12 @@ int setup() {
char *error_msg = NULL;
int status = 0;

rpsdk_profiling_mode = RPSDK_MODE_CALLBACK_DISPATCH;
if( NULL != getenv("RPSDK_MODE_AGENT_PROFILE") ){
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems to be a PAPI only env var, better to prefix it with PAPI_?

// Warning: RPSDK_MODE_AGENT_PROFILE mode does not work properly yet, due to rocprofiler-sdk bugs.
rpsdk_profiling_mode = RPSDK_MODE_AGENT_PROFILE;
}

if ( NULL != pathname && strlen(pathname) <= PATH_MAX ) {
dllHandle = dlopen(pathname, RTLD_NOW | RTLD_GLOBAL);
}
Expand Down Expand Up @@ -1113,7 +1088,7 @@ rocprofiler_sdk_ctx_open(int *event_ids, int num_events, vendorp_ctx_t *ctx)
goto fn_fail;
}
papi_rocpsdk::active_event_set_ctx = *ctx;
papi_rocpsdk::set_dispatch_profiles(*ctx);
papi_rocpsdk::set_profile_cache(*ctx);

(*ctx)->state = RPSDK_AES_OPEN;

Expand All @@ -1129,7 +1104,6 @@ extern "C" int
rocprofiler_sdk_ctx_read(vendorp_ctx_t ctx, long long **counters)
{
int papi_errno = PAPI_OK;
static int count;

#if defined(AGENT_PROFILE_MODE)
if( RPSDK_MODE_AGENT_PROFILE == papi_rocpsdk::get_profiling_mode() ){
Expand Down
1 change: 1 addition & 0 deletions src/components/rocp_sdk/sdk_class.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <list>
#include <mutex>
#include <shared_mutex>
#include <condition_variable>
#include <regex>
#include <string>
#include <string_view>
Expand Down
5 changes: 4 additions & 1 deletion src/components/rocp_sdk/tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ CFLAGS = $(OPTFLAGS)
CPPFLAGS += $(INCLUDE)
LDFLAGS += $(PAPILIB) $(TESTLIB) $(UTILOBJS)

TESTS = simple advanced two_eventsets
TESTS = simple advanced two_eventsets simple_sampling
#TESTS = simple_sampling
template_tests: $(TESTS)

%.o: %.c
Expand All @@ -23,6 +24,8 @@ advanced: advanced.o kernel.o
two_eventsets: two_eventsets.o kernel.o
$(PAPI_ROCP_SDK_ROOT)/bin/amdclang++ -O2 -g -DNDEBUG --offload-arch=gfx90a --offload-arch=gfx90a --hip-link --rtlib=compiler-rt -unwindlib=libgcc two_eventsets.o kernel.o -o two_eventsets $(PAPI_ROCP_SDK_ROOT)/lib/libamdhip64.so.6 $(LDFLAGS)

simple_sampling: simple_sampling.o kernel.o
$(PAPI_ROCP_SDK_ROOT)/bin/amdclang++ -O2 -g -DNDEBUG --offload-arch=gfx90a --offload-arch=gfx90a --hip-link --rtlib=compiler-rt -unwindlib=libgcc simple_sampling.o kernel.o -o simple_sampling $(PAPI_ROCP_SDK_ROOT)/lib/libamdhip64.so.6 $(LDFLAGS)

clean:
rm -f $(TESTS) *.o
4 changes: 2 additions & 2 deletions src/components/rocp_sdk/tests/advanced.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ int main(int argc, char *argv[])
printf("--------------------- launch_kernel(1)\n");
launch_kernel(1);

sleep(1);
usleep(1000);

papi_errno = PAPI_read(eventset, counters);
if (papi_errno != PAPI_OK) {
Expand Down Expand Up @@ -83,7 +83,7 @@ int main(int argc, char *argv[])
printf("--------------------- launch_kernel(1)\n");
launch_kernel(1);

sleep(1);
usleep(1000);

papi_errno = PAPI_read(eventset, counters);
if (papi_errno != PAPI_OK) {
Expand Down
2 changes: 1 addition & 1 deletion src/components/rocp_sdk/tests/simple.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ int main(int argc, char *argv[])
printf("--------------------- launch_kernel(0)\n");
launch_kernel(0);

sleep(1);
usleep(1000);

papi_errno = PAPI_read(eventset, counters);
if (papi_errno != PAPI_OK) {
Expand Down
Loading