Skip to content

Commit

Permalink
fix: Put app to the operation queue after refresh queue processing to…
Browse files Browse the repository at this point in the history
… avoid race condition - Fixes [#18500]

Adding app to the operation queue and refresh queue could cause waiting for resource for minutes to tens of minutes.
Sync state operates on resources gathered from reconciliation, so if the app operation event is processed before the refresh one (when triggered on resource update/creation), the refresh doesn’t help sync to progress and it essentially needs to wait for another app refresh.

The fix seems to be to schedule app operation event after refresh event is finished processing. There’s one place where operation event is scheduled without refresh event (which can be kept there), and one place where refresh even is scheduled without the operation one during the app deletion handling https://github.com/argoproj/argo-cd/blob/3e2cfb138795e24df98c9745d813e4b7f1720dbd/controller/appcontroller.go#L2177. It’s probably safe to schedule operation even after that, since it has some code to check that app was deleted. If not, an update can be made to have refresh queue storing a tuple with app key and bool whether to enqueue app operation.

If there are issues: try keeping both old places to add to app operation queue and new addition after refresh.

Note on cherry pick: add to as many releases as you can. This can be a significant performance boost.

Signed-off-by: Andrii Korotkov <andrii.korotkov@verkada.com>
  • Loading branch information
andrii-korotkov-verkada committed Jul 27, 2024
1 parent 929e855 commit 71a4913
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 6 deletions.
10 changes: 6 additions & 4 deletions controller/appcontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -910,10 +910,8 @@ func (ctrl *ApplicationController) requestAppRefresh(appName string, compareWith
}
if after != nil {
ctrl.appRefreshQueue.AddAfter(key, *after)
ctrl.appOperationQueue.AddAfter(key, *after)
} else {
ctrl.appRefreshQueue.AddRateLimited(key)
ctrl.appOperationQueue.AddRateLimited(key)
}
}
}
Expand Down Expand Up @@ -1548,6 +1546,9 @@ func (ctrl *ApplicationController) processAppRefreshQueueItem() (processNext boo
if r := recover(); r != nil {
log.Errorf("Recovered from panic: %+v\n%s", r, debug.Stack())
}
// We want to have app operation update happen after the sync, so there's no race condition
// and app updates not proceeding. See https://github.com/argoproj/argo-cd/issues/18500.
ctrl.appOperationQueue.AddRateLimited(appKey)
ctrl.appRefreshQueue.Done(appKey)
}()
obj, exists, err := ctrl.appInformer.GetIndexer().GetByKey(appKey.(string))
Expand Down Expand Up @@ -2221,7 +2222,6 @@ func (ctrl *ApplicationController) newApplicationInformerAndLister() (cache.Shar
key, err := cache.MetaNamespaceKeyFunc(obj)
if err == nil {
ctrl.appRefreshQueue.AddRateLimited(key)
ctrl.appOperationQueue.AddRateLimited(key)
}
newApp, newOK := obj.(*appv1.Application)
if err == nil && newOK {
Expand Down Expand Up @@ -2256,7 +2256,9 @@ func (ctrl *ApplicationController) newApplicationInformerAndLister() (cache.Shar
}

ctrl.requestAppRefresh(newApp.QualifiedName(), compareWith, delay)
ctrl.appOperationQueue.AddRateLimited(key)
if delay != nil && *delay != time.Duration(0) {
ctrl.appOperationQueue.AddRateLimited(key)
}
ctrl.clusterSharding.UpdateApp(newApp)
},
DeleteFunc: func(obj interface{}) {
Expand Down
43 changes: 43 additions & 0 deletions test/e2e/app_management_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2879,3 +2879,46 @@ func TestAnnotationTrackingExtraResources(t *testing.T) {
Expect(SyncStatusIs(SyncStatusCodeSynced)).
Expect(HealthIs(health.HealthStatusHealthy))
}

func TestCreateConfigMapsAndWaitForUpdate(t *testing.T) {
Given(t).
Path("config-map").
When().
CreateApp().
Sync().
Then().
And(func(app *Application) {
_, err := RunCli("app", "set", app.Name, "--sync-policy", "automated")
require.NoError(t, err)
}).
When().
AddFile("other-configmap.yaml", `
apiVersion: v1
kind: ConfigMap
metadata:
name: other-map
annotations:
argocd.argoproj.io/sync-wave: "1"
data:
foo2: bar2`).
AddFile("yet-another-configmap.yaml", `
apiVersion: v1
kind: ConfigMap
metadata:
name: yet-another-map
annotations:
argocd.argoproj.io/sync-wave: "2"
data:
foo3: bar3`).
PatchFile("kustomization.yaml", `[{"op": "add", "path": "/resources/-", "value": "other-configmap.yaml"}, {"op": "add", "path": "/resources/-", "value": "yet-another-configmap.yaml"}]`).
Refresh(RefreshTypeNormal).
Wait().
Then().
Expect(OperationPhaseIs(OperationSucceeded)).
Expect(SyncStatusIs(SyncStatusCodeSynced)).
Expect(HealthIs(health.HealthStatusHealthy)).
Expect(ResourceHealthWithNamespaceIs("ConfigMap", "other-map", DeploymentNamespace(), health.HealthStatusHealthy)).
Expect(ResourceSyncStatusWithNamespaceIs("ConfigMap", "other-map", DeploymentNamespace(), SyncStatusCodeSynced)).
Expect(ResourceHealthWithNamespaceIs("ConfigMap", "yet-another-map", DeploymentNamespace(), health.HealthStatusHealthy)).
Expect(ResourceSyncStatusWithNamespaceIs("ConfigMap", "yet-another-map", DeploymentNamespace(), SyncStatusCodeSynced))
}
18 changes: 16 additions & 2 deletions test/e2e/fixture/app/expectation.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,28 @@ func ResourceSyncStatusWithNamespaceIs(kind, resource, namespace string, expecte

func ResourceHealthIs(kind, resource string, expected health.HealthStatusCode) Expectation {
return func(c *Consequences) (state, string) {
actual := c.resource(kind, resource, "").Health.Status
var actual health.HealthStatusCode
resourceHealth := c.resource(kind, resource, "").Health
if resourceHealth != nil {
actual = resourceHealth.Status
} else {
// Some resources like ConfigMap may not have health status when they are okay
actual = health.HealthStatusHealthy
}
return simple(actual == expected, fmt.Sprintf("resource '%s/%s' health should be %s, is %s", kind, resource, expected, actual))
}
}

func ResourceHealthWithNamespaceIs(kind, resource, namespace string, expected health.HealthStatusCode) Expectation {
return func(c *Consequences) (state, string) {
actual := c.resource(kind, resource, namespace).Health.Status
var actual health.HealthStatusCode
resourceHealth := c.resource(kind, resource, namespace).Health
if resourceHealth != nil {
actual = resourceHealth.Status
} else {
// Some resources like ConfigMap may not have health status when they are okay
actual = health.HealthStatusHealthy
}
return simple(actual == expected, fmt.Sprintf("resource '%s/%s' health should be %s, is %s", kind, resource, expected, actual))
}
}
Expand Down

0 comments on commit 71a4913

Please sign in to comment.