Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Deadlock issue caused in listResources (argoproj/argo-cd#18902) #599

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions pkg/cache/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -527,11 +527,14 @@ func runSynced(lock sync.Locker, action func() error) error {
}

// listResources creates list pager and enforces number of concurrent list requests
func (c *clusterCache) listResources(ctx context.Context, resClient dynamic.ResourceInterface, callback func(*pager.ListPager) error) (string, error) {
if err := c.listSemaphore.Acquire(ctx, 1); err != nil {
return "", err
func (c *clusterCache) listResources(ctx context.Context, resClient dynamic.ResourceInterface, takeSemaphore bool, callback func(*pager.ListPager) error) (string, error) {
Copy link
Member

@agaudreault agaudreault Jul 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was working on the "same" issue. I had the idea to skip the semaphore, but while it would solve the problem, it disables the feature added by 605958d

See PR suggestion #604.

Both PRs solve the issue, let's discuss in this thread what option is the best one. @gdsoumya you added the replaceResourceCache as part of the listResource callback (#532). Any specific reason?

if takeSemaphore {
if err := c.listSemaphore.Acquire(ctx, 1); err != nil {
return "", err
}
defer c.listSemaphore.Release(1)
}
defer c.listSemaphore.Release(1)

var retryCount int64 = 0
resourceVersion := ""
listPager := pager.New(func(ctx context.Context, opts metav1.ListOptions) (runtime.Object, error) {
Expand Down Expand Up @@ -568,7 +571,8 @@ func (c *clusterCache) listResources(ctx context.Context, resClient dynamic.Reso
}

func (c *clusterCache) loadInitialState(ctx context.Context, api kube.APIResourceInfo, resClient dynamic.ResourceInterface, ns string, lock bool) (string, error) {
return c.listResources(ctx, resClient, func(listPager *pager.ListPager) error {
// If we are intending to take the lock in our callback, we need to skip taking the semaphore in listResources to avoid deadlock.
return c.listResources(ctx, resClient, lock, func(listPager *pager.ListPager) error {
var items []*Resource
err := listPager.EachListItem(ctx, metav1.ListOptions{}, func(obj runtime.Object) error {
if un, ok := obj.(*unstructured.Unstructured); !ok {
Expand Down Expand Up @@ -859,7 +863,7 @@ func (c *clusterCache) sync() error {
lock.Unlock()

return c.processApi(client, api, func(resClient dynamic.ResourceInterface, ns string) error {
resourceVersion, err := c.listResources(ctx, resClient, func(listPager *pager.ListPager) error {
resourceVersion, err := c.listResources(ctx, resClient, true, func(listPager *pager.ListPager) error {
return listPager.EachListItem(context.Background(), metav1.ListOptions{}, func(obj runtime.Object) error {
if un, ok := obj.(*unstructured.Unstructured); !ok {
return fmt.Errorf("object %s/%s has an unexpected type", un.GroupVersionKind().String(), un.GetName())
Expand Down
Loading