From 5664b2f9660ada1b1ece34d86c83328b909ba791 Mon Sep 17 00:00:00 2001 From: Andrii Korotkov Date: Sun, 16 Jun 2024 00:24:55 -0700 Subject: [PATCH] fix: Put app to the operation queue after refresh queue processing to avoid race condition - Fixes [#18500] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding app to the operation queue and refresh queue could cause waiting for resource for minutes to tens of minutes. Sync state operates on resources gathered from reconciliation, so if the app operation event is processed before the refresh one (when triggered on resource update/creation), the refresh doesn’t help sync to progress and it essentially needs to wait for another app refresh. The fix seems to be to schedule app operation event after refresh event is finished processing. There’s one place where operation event is scheduled without refresh event (which can be kept there), and one place where refresh even is scheduled without the operation one during the app deletion handling https://github.com/argoproj/argo-cd/blob/3e2cfb138795e24df98c9745d813e4b7f1720dbd/controller/appcontroller.go#L2177. It’s probably safe to schedule operation even after that, since it has some code to check that app was deleted. If not, an update can be made to have refresh queue storing a tuple with app key and bool whether to enqueue app operation. If there are issues: try keeping both old places to add to app operation queue and new addition after refresh. Note on cherry pick: add to as many releases as you can. This can be a significant performance boost. Signed-off-by: Andrii Korotkov --- controller/appcontroller.go | 10 ++++--- test/e2e/app_management_test.go | 43 +++++++++++++++++++++++++++++ test/e2e/fixture/app/expectation.go | 18 ++++++++++-- 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/controller/appcontroller.go b/controller/appcontroller.go index f608e53d5aac6..005e5524cfc5d 100644 --- a/controller/appcontroller.go +++ b/controller/appcontroller.go @@ -910,10 +910,8 @@ func (ctrl *ApplicationController) requestAppRefresh(appName string, compareWith } if after != nil { ctrl.appRefreshQueue.AddAfter(key, *after) - ctrl.appOperationQueue.AddAfter(key, *after) } else { ctrl.appRefreshQueue.AddRateLimited(key) - ctrl.appOperationQueue.AddRateLimited(key) } } } @@ -1548,6 +1546,9 @@ func (ctrl *ApplicationController) processAppRefreshQueueItem() (processNext boo if r := recover(); r != nil { log.Errorf("Recovered from panic: %+v\n%s", r, debug.Stack()) } + // We want to have app operation update happen after the sync, so there's no race condition + // and app updates not proceeding. See https://github.com/argoproj/argo-cd/issues/18500. + ctrl.appOperationQueue.AddRateLimited(appKey) ctrl.appRefreshQueue.Done(appKey) }() obj, exists, err := ctrl.appInformer.GetIndexer().GetByKey(appKey.(string)) @@ -2221,7 +2222,6 @@ func (ctrl *ApplicationController) newApplicationInformerAndLister() (cache.Shar key, err := cache.MetaNamespaceKeyFunc(obj) if err == nil { ctrl.appRefreshQueue.AddRateLimited(key) - ctrl.appOperationQueue.AddRateLimited(key) } newApp, newOK := obj.(*appv1.Application) if err == nil && newOK { @@ -2256,7 +2256,9 @@ func (ctrl *ApplicationController) newApplicationInformerAndLister() (cache.Shar } ctrl.requestAppRefresh(newApp.QualifiedName(), compareWith, delay) - ctrl.appOperationQueue.AddRateLimited(key) + if !newOK || (delay != nil && *delay != time.Duration(0)) { + ctrl.appOperationQueue.AddRateLimited(key) + } ctrl.clusterSharding.UpdateApp(newApp) }, DeleteFunc: func(obj interface{}) { diff --git a/test/e2e/app_management_test.go b/test/e2e/app_management_test.go index 295496075ae9a..6af70990cecd8 100644 --- a/test/e2e/app_management_test.go +++ b/test/e2e/app_management_test.go @@ -2879,3 +2879,46 @@ func TestAnnotationTrackingExtraResources(t *testing.T) { Expect(SyncStatusIs(SyncStatusCodeSynced)). Expect(HealthIs(health.HealthStatusHealthy)) } + +func TestCreateConfigMapsAndWaitForUpdate(t *testing.T) { + Given(t). + Path("config-map"). + When(). + CreateApp(). + Sync(). + Then(). + And(func(app *Application) { + _, err := RunCli("app", "set", app.Name, "--sync-policy", "automated") + require.NoError(t, err) + }). + When(). + AddFile("other-configmap.yaml", ` +apiVersion: v1 +kind: ConfigMap +metadata: + name: other-map + annotations: + argocd.argoproj.io/sync-wave: "1" +data: + foo2: bar2`). + AddFile("yet-another-configmap.yaml", ` +apiVersion: v1 +kind: ConfigMap +metadata: + name: yet-another-map + annotations: + argocd.argoproj.io/sync-wave: "2" +data: + foo3: bar3`). + PatchFile("kustomization.yaml", `[{"op": "add", "path": "/resources/-", "value": "other-configmap.yaml"}, {"op": "add", "path": "/resources/-", "value": "yet-another-configmap.yaml"}]`). + Refresh(RefreshTypeNormal). + Wait(). + Then(). + Expect(OperationPhaseIs(OperationSucceeded)). + Expect(SyncStatusIs(SyncStatusCodeSynced)). + Expect(HealthIs(health.HealthStatusHealthy)). + Expect(ResourceHealthWithNamespaceIs("ConfigMap", "other-map", DeploymentNamespace(), health.HealthStatusHealthy)). + Expect(ResourceSyncStatusWithNamespaceIs("ConfigMap", "other-map", DeploymentNamespace(), SyncStatusCodeSynced)). + Expect(ResourceHealthWithNamespaceIs("ConfigMap", "yet-another-map", DeploymentNamespace(), health.HealthStatusHealthy)). + Expect(ResourceSyncStatusWithNamespaceIs("ConfigMap", "yet-another-map", DeploymentNamespace(), SyncStatusCodeSynced)) +} diff --git a/test/e2e/fixture/app/expectation.go b/test/e2e/fixture/app/expectation.go index 8546a4eed7be9..b5e83a664085c 100644 --- a/test/e2e/fixture/app/expectation.go +++ b/test/e2e/fixture/app/expectation.go @@ -143,14 +143,28 @@ func ResourceSyncStatusWithNamespaceIs(kind, resource, namespace string, expecte func ResourceHealthIs(kind, resource string, expected health.HealthStatusCode) Expectation { return func(c *Consequences) (state, string) { - actual := c.resource(kind, resource, "").Health.Status + var actual health.HealthStatusCode + resourceHealth := c.resource(kind, resource, "").Health + if resourceHealth != nil { + actual = resourceHealth.Status + } else { + // Some resources like ConfigMap may not have health status when they are okay + actual = health.HealthStatusHealthy + } return simple(actual == expected, fmt.Sprintf("resource '%s/%s' health should be %s, is %s", kind, resource, expected, actual)) } } func ResourceHealthWithNamespaceIs(kind, resource, namespace string, expected health.HealthStatusCode) Expectation { return func(c *Consequences) (state, string) { - actual := c.resource(kind, resource, namespace).Health.Status + var actual health.HealthStatusCode + resourceHealth := c.resource(kind, resource, namespace).Health + if resourceHealth != nil { + actual = resourceHealth.Status + } else { + // Some resources like ConfigMap may not have health status when they are okay + actual = health.HealthStatusHealthy + } return simple(actual == expected, fmt.Sprintf("resource '%s/%s' health should be %s, is %s", kind, resource, expected, actual)) } }