Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EventPipe] Block EventPipeProvider Deletion for ongoing callbacks #106040

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ internal override unsafe void Register(EventSource eventSource)
}

// Unregister an event provider.
// Calling Unregister within a Callback will result in a deadlock
// as deleting the provider with an active tracing session will block
// until all of the provider's callbacks are completed.
internal override void Unregister()
{
if (_provHandle != 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ test_provider_callback_data_queue (void)
1,
EP_EVENT_LEVEL_LOGALWAYS,
true,
0);
0,
NULL);
ep_provider_callback_data_queue_enqueue (provider_callback_data_queue, provider_enqueue_callback_data);
ep_provider_callback_data_fini (provider_enqueue_callback_data);
}
Expand Down
30 changes: 28 additions & 2 deletions src/native/eventpipe/ep-provider.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ provider_prepare_callback_data (
EP_ASSERT (provider != NULL);
EP_ASSERT (provider_callback_data != NULL);

ep_requires_lock_held ();

if (provider->callback_func != NULL)
lateralusX marked this conversation as resolved.
Show resolved Hide resolved
provider->callbacks_pending++;

return ep_provider_callback_data_init (
provider_callback_data,
filter_data,
Expand All @@ -86,7 +91,8 @@ provider_prepare_callback_data (
keywords,
provider_level,
provider->sessions != 0,
session_id);
session_id,
provider);
}

static
Expand Down Expand Up @@ -193,13 +199,17 @@ ep_provider_alloc (
instance->event_list = dn_list_alloc ();
ep_raise_error_if_nok (instance->event_list != NULL);

ep_rt_wait_event_alloc (&instance->callbacks_complete_event, true /* bManual */, false /* bInitial */);
ep_raise_error_if_nok (ep_rt_wait_event_is_valid (&instance->callbacks_complete_event));

instance->keywords = 0;
instance->provider_level = EP_EVENT_LEVEL_CRITICAL;
instance->callback_func = callback_func;
instance->callback_data = callback_data;
instance->config = config;
instance->delete_deferred = false;
instance->sessions = 0;
instance->callbacks_pending = 0;

ep_on_exit:
return instance;
Expand All @@ -225,6 +235,7 @@ ep_provider_free (EventPipeProvider * provider)
}

ep_on_exit:
ep_rt_wait_event_free (&provider->callbacks_complete_event);
ep_rt_utf16_string_free (provider->provider_name_utf16);
ep_rt_utf8_string_free (provider->provider_name);
ep_rt_object_free (provider);
Expand Down Expand Up @@ -363,7 +374,9 @@ provider_invoke_callback (EventPipeProviderCallbackData *provider_callback_data)
{
EP_ASSERT (provider_callback_data != NULL);

// Lock should not be held when invoking callback.
// A lock should not be held when invoking the callback, as concurrent callbacks
// may trigger a deadlock with the EventListenersLock as detailed in
// https://github.com/dotnet/runtime/pull/105734
ep_requires_lock_not_held ();

const ep_char8_t *filter_data = ep_provider_callback_data_get_filter_data (provider_callback_data);
Expand Down Expand Up @@ -427,6 +440,19 @@ provider_invoke_callback (EventPipeProviderCallbackData *provider_callback_data)
callback_data /* CallbackContext */);
}

// The callback completed, can take the lock again.
lateralusX marked this conversation as resolved.
Show resolved Hide resolved
EP_LOCK_ENTER (section1)
if (callback_function != NULL) {
EventPipeProvider *provider = provider_callback_data->provider;
provider->callbacks_pending--;
if (provider->callbacks_pending == 0 && provider->callback_func == NULL) {
// ep_delete_provider deferred provider deletion and is waiting for all in-flight callbacks
// to complete. This is the last callback, so signal completion.
ep_rt_wait_event_set (&provider->callbacks_complete_event);
}
}
EP_LOCK_EXIT (section1)

ep_on_exit:
if (is_event_filter_desc_init)
ep_event_filter_desc_fini (&event_filter_desc);
Expand Down
6 changes: 6 additions & 0 deletions src/native/eventpipe/ep-provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ struct _EventPipeProvider_Internal {
// True if the provider has been deleted, but that deletion
// has been deferred until tracing is stopped.
bool delete_deferred;
// The number of pending EventPipeProvider callbacks that have
// not completed.
int64_t callbacks_pending;
// Event object used to signal eventpipe provider deletion
// that all in flight callbacks have completed.
ep_rt_wait_event_handle_t callbacks_complete_event;
};

#if !defined(EP_INLINE_GETTER_SETTER) && !defined(EP_IMPL_PROVIDER_GETTER_SETTER)
Expand Down
7 changes: 5 additions & 2 deletions src/native/eventpipe/ep-types.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ struct _EventPipeProviderCallbackData_Internal {
EventPipeEventLevel provider_level;
bool enabled;
EventPipeSessionID session_id;
EventPipeProvider *provider;
};

#if !defined(EP_INLINE_GETTER_SETTER) && !defined(EP_IMPL_EP_GETTER_SETTER)
Expand All @@ -100,7 +101,8 @@ ep_provider_callback_data_alloc (
int64_t keywords,
EventPipeEventLevel provider_level,
bool enabled,
EventPipeSessionID session_id);
EventPipeSessionID session_id,
EventPipeProvider *provider);

EventPipeProviderCallbackData *
ep_provider_callback_data_alloc_copy (EventPipeProviderCallbackData *provider_callback_data_src);
Expand All @@ -117,7 +119,8 @@ ep_provider_callback_data_init (
int64_t keywords,
EventPipeEventLevel provider_level,
bool enabled,
EventPipeSessionID session_id);
EventPipeSessionID session_id,
EventPipeProvider *provider);

EventPipeProviderCallbackData *
ep_provider_callback_data_init_copy (
Expand Down
28 changes: 25 additions & 3 deletions src/native/eventpipe/ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,8 @@ ep_provider_callback_data_alloc (
int64_t keywords,
EventPipeEventLevel provider_level,
bool enabled,
EventPipeSessionID session_id)
EventPipeSessionID session_id,
EventPipeProvider *provider)
{
EventPipeProviderCallbackData *instance = ep_rt_object_alloc (EventPipeProviderCallbackData);
ep_raise_error_if_nok (instance != NULL);
Expand All @@ -238,7 +239,8 @@ ep_provider_callback_data_alloc (
keywords,
provider_level,
enabled,
session_id) != NULL);
session_id,
provider) != NULL);

ep_on_exit:
return instance;
Expand Down Expand Up @@ -298,7 +300,8 @@ ep_provider_callback_data_init (
int64_t keywords,
EventPipeEventLevel provider_level,
bool enabled,
EventPipeSessionID session_id)
EventPipeSessionID session_id,
EventPipeProvider *provider)
{
EP_ASSERT (provider_callback_data != NULL);

Expand All @@ -309,6 +312,7 @@ ep_provider_callback_data_init (
provider_callback_data->provider_level = provider_level;
provider_callback_data->enabled = enabled;
provider_callback_data->session_id = session_id;
provider_callback_data->provider = provider;

return provider_callback_data;
}
Expand Down Expand Up @@ -1313,15 +1317,33 @@ ep_delete_provider (EventPipeProvider *provider)
// Take the lock to make sure that we don't have a race
// between disabling tracing and deleting a provider
// where we hold a provider after tracing has been disabled.
bool wait_for_provider_callbacks_completion = false;
EP_LOCK_ENTER (section1)
if (enabled ()) {
// Save the provider until the end of the tracing session.
ep_provider_set_delete_deferred (provider, true);

// The callback func must be previously set to null,
// otherwise callbacks might never stop coming.
EP_ASSERT (provider->callback_func == NULL);

// Calling ep_delete_provider within a Callback will result in a deadlock
// as deleting the provider with an active tracing session will block
// until all of the provider's callbacks are completed.
if (provider->callbacks_pending > 0)
wait_for_provider_callbacks_completion = true;
mdh1418 marked this conversation as resolved.
Show resolved Hide resolved
} else {
config_delete_provider (ep_config_get (), provider);
}
EP_LOCK_EXIT (section1)

// Block provider deletion until all pending callbacks are completed.
// Helps prevent the EventPipeEventProvider Unregister logic from
// freeing freeing the provider's weak reference gchandle before
// callbacks using that handle have completed.
if (wait_for_provider_callbacks_completion)
mdh1418 marked this conversation as resolved.
Show resolved Hide resolved
ep_rt_wait_event_wait (&provider->callbacks_complete_event, EP_INFINITE_WAIT, false);

ep_on_exit:
ep_requires_lock_not_held ();
return;
Expand Down
6 changes: 0 additions & 6 deletions src/tests/issues.targets
Original file line number Diff line number Diff line change
Expand Up @@ -404,9 +404,6 @@
<ExcludeList Include="$(XunitTestBinBase)/tracing/runtimeeventsource/nativeruntimeeventsource/*">
<Issue>https://github.com/dotnet/runtime/issues/68690</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/tracing/eventpipe/eventsourceerror/eventsourceerror/*">
<Issue>https://github.com/dotnet/runtime/issues/80666</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/JIT/jit64/regress/vsw/373472/**">
<Issue>Allocates large contiguous array that is not consistently available on 32-bit platforms</Issue>
</ExcludeList>
Expand All @@ -420,9 +417,6 @@
<ExcludeList Include="$(XunitTestBinBase)/profiler/eventpipe/eventpipe/*">
<Issue>https://github.com/dotnet/runtime/issues/66174</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/tracing/eventpipe/eventsourceerror/eventsourceerror/*">
<Issue>https://github.com/dotnet/runtime/issues/80666</Issue>
</ExcludeList>
</ItemGroup>

<!-- OSX x64 on CoreCLR Runtime -->
Expand Down
Loading