From 937c8c026a9d9a9a88ca00038f3a32d647f2ae07 Mon Sep 17 00:00:00 2001 From: Lee Bernick Date: Fri, 23 Jun 2023 10:17:58 -0400 Subject: [PATCH] Fail PipelineRun when it can't create Runs Prior to this commit, if a PipelineRun couldn't create TaskRuns, the resulting error would be requeued and the PipelineRun controller would continue to retry. The PipelineRun status would represent the outcome of a previous reconcile loop, which sometimes resulted in PipelineRun statuses that didn't match what was occuring on the cluster. This commit marks a PipelineRun as failed and returns a permanent error if TaskRun or CustomRun creation fails for a non-retryable reason. --- pkg/reconciler/pipelinerun/pipelinerun.go | 24 ++- .../pipelinerun/pipelinerun_test.go | 139 +++++++++++++++++- 2 files changed, 150 insertions(+), 13 deletions(-) diff --git a/pkg/reconciler/pipelinerun/pipelinerun.go b/pkg/reconciler/pipelinerun/pipelinerun.go index fac1e3c468e..eab82d3852c 100644 --- a/pkg/reconciler/pipelinerun/pipelinerun.go +++ b/pkg/reconciler/pipelinerun/pipelinerun.go @@ -57,6 +57,7 @@ import ( "go.opentelemetry.io/otel/trace" "go.uber.org/zap" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" k8slabels "k8s.io/apimachinery/pkg/labels" @@ -769,13 +770,6 @@ func (c *Reconciler) runNextSchedulableTask(ctx context.Context, pr *v1.Pipeline } } - defer func() { - // If it is a permanent error, set pipelinerun to a failure state directly to avoid unnecessary retries. - if err != nil && controller.IsPermanentError(err) { - pr.Status.MarkFailed(ReasonCreateRunFailed, err.Error()) - } - }() - if rpt.IsCustomTask() { rpt.CustomRuns, err = c.createCustomRuns(ctx, rpt, pr) if err != nil { @@ -821,6 +815,7 @@ func (c *Reconciler) createTaskRuns(ctx context.Context, rpt *resources.Resolved } taskRun, err := c.createTaskRun(ctx, taskRunName, params, rpt, pr) if err != nil { + err := c.handleRunCreationError(ctx, pr, err) return nil, err } taskRuns = append(taskRuns, taskRun) @@ -886,6 +881,20 @@ func (c *Reconciler) createTaskRun(ctx context.Context, taskRunName string, para return c.PipelineClientSet.TektonV1().TaskRuns(pr.Namespace).Create(ctx, tr, metav1.CreateOptions{}) } +// handleRunCreationError marks the PipelineRun as failed and returns a permanent error if the run creation error is not retryable +func (c *Reconciler) handleRunCreationError(ctx context.Context, pr *v1.PipelineRun, err error) error { + if controller.IsPermanentError(err) { + pr.Status.MarkFailed(ReasonCreateRunFailed, err.Error()) + return err + } + // This is not a complete list of permanent errors. Any permanent error with TaskRun/CustomRun creation can be added here. + if apierrors.IsInvalid(err) || apierrors.IsBadRequest(err) { + pr.Status.MarkFailed(ReasonCreateRunFailed, err.Error()) + return controller.NewPermanentError(err) + } + return err +} + func (c *Reconciler) createCustomRuns(ctx context.Context, rpt *resources.ResolvedPipelineTask, pr *v1.PipelineRun) ([]*v1beta1.CustomRun, error) { var customRuns []*v1beta1.CustomRun ctx, span := c.tracerProvider.Tracer(TracerName).Start(ctx, "createCustomRuns") @@ -902,6 +911,7 @@ func (c *Reconciler) createCustomRuns(ctx context.Context, rpt *resources.Resolv } customRun, err := c.createCustomRun(ctx, customRunName, params, rpt, pr) if err != nil { + err := c.handleRunCreationError(ctx, pr, err) return nil, err } customRuns = append(customRuns, customRun) diff --git a/pkg/reconciler/pipelinerun/pipelinerun_test.go b/pkg/reconciler/pipelinerun/pipelinerun_test.go index 3d0a631ac74..0d3da0a5e88 100644 --- a/pkg/reconciler/pipelinerun/pipelinerun_test.go +++ b/pkg/reconciler/pipelinerun/pipelinerun_test.go @@ -58,10 +58,13 @@ import ( "gomodules.xyz/jsonpatch/v2" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/validation/field" fakek8s "k8s.io/client-go/kubernetes/fake" ktesting "k8s.io/client-go/testing" "k8s.io/client-go/tools/record" @@ -6600,7 +6603,6 @@ func (prt PipelineRunTest) reconcileRun(namespace, pipelineRunName string, wantE // Check generated events match what's expected if len(wantEvents) > 0 { if err := k8sevent.CheckEventsOrdered(prt.Test, prt.TestAssets.Recorder.Events, pipelineRunName, wantEvents); err != nil { - prt.Test.Fatalf("falal added: %s:", err.Error()) prt.Test.Errorf(err.Error()) } } @@ -13113,12 +13115,12 @@ spec: defer prt.Cancel() wantEvents := []string{ - "Normal Started", - "Warning TaskRunsCreationFailed", - "error creating TaskRuns called \\[test-pipeline-run-with-create-run-failed-hello-world]\\ for PipelineTask hello-world from PipelineRun test-pipeline-run-with-create-run-failed: expected workspace \"source\" to be provided by pipelinerun for pipeline task \"hello-world\"", - "error creating TaskRuns called \\[test-pipeline-run-with-create-run-failed-hello-world]\\ for PipelineTask hello-world from PipelineRun test-pipeline-run-with-create-run-failed: expected workspace \"source\" to be provided by pipelinerun for pipeline task \"hello-world\"", + "Normal Started ", + "Warning TaskRunsCreationFailed Failed to create TaskRuns [\"test-pipeline-run-with-create-run-failed-hello-world\"]: expected workspace \"source\" to be provided by pipelinerun for pipeline task \"hello-world\"", + "Warning Failed expected workspace \"source\" to be provided by pipelinerun for pipeline task \"hello-world\"", + "Warning InternalError 1 error occurred:\n\t* error creating TaskRuns called [test-pipeline-run-with-create-run-failed-hello-world] for PipelineTask hello-world from PipelineRun test-pipeline-run-with-create-run-failed: expected workspace \"source\" to be provided by pipelinerun for pipeline task \"hello-world\"\n\n", } - reconciledRun, _ := prt.reconcileRun("foo", "test-pipeline-run-with-create-run-failed", wantEvents, true) + reconciledRun, _ := prt.reconcileRun("foo", "test-pipeline-run-with-create-run-failed", wantEvents, true /* permanentError */) if reconciledRun.Status.CompletionTime == nil { t.Errorf("Expected a CompletionTime on invalid PipelineRun but was nil") @@ -13130,6 +13132,131 @@ spec: } } +func TestHandleTaskRunCreationError(t *testing.T) { + prName := "taskrun-creation-fails" + namespace := "default" + pr := parse.MustParseV1PipelineRun(t, fmt.Sprintf(` +metadata: + name: %s + namespace: %s +spec: + pipelineSpec: + tasks: + - name: hello-world + taskSpec: + steps: + - image: busybox + script: echo hello +`, prName, namespace)) + taskRunGK := schema.GroupKind{Group: "tekton.dev/v1", Kind: "taskrun"} + + tcs := []struct { + name string + creationErr error + }{{ + name: "invalid", + creationErr: apierrors.NewInvalid(taskRunGK, fmt.Sprintf("%s-hello-world", prName), field.ErrorList{}), + }, { + name: "bad request", + creationErr: apierrors.NewBadRequest("bad request"), + }} + for _, tc := range tcs { + d := test.Data{ + PipelineRuns: []*v1.PipelineRun{pr.DeepCopy()}, + } + testAssets, cancel := getPipelineRunController(t, d) + defer cancel() + c := testAssets.Controller + clients := testAssets.Clients + + // Create an error when the Pipeline client attempts to create TaskRuns + clients.Pipeline.PrependReactor("create", "taskruns", func(action ktesting.Action) (bool, runtime.Object, error) { + return true, nil, tc.creationErr + }) + err := c.Reconciler.Reconcile(testAssets.Ctx, fmt.Sprintf("%s/%s", namespace, prName)) + if !controller.IsPermanentError(err) { + t.Errorf("expected permanent error but got %s", err) + } + reconciledRun, err := clients.Pipeline.TektonV1().PipelineRuns(namespace).Get(testAssets.Ctx, prName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("Somehow had error getting reconciled run out of fake client: %s", err) + } + + if reconciledRun.Status.CompletionTime == nil { + t.Errorf("Expected a CompletionTime on invalid PipelineRun but was nil") + } + + // The PipelineRun should be create run failed. + if reconciledRun.Status.GetCondition(apis.ConditionSucceeded).Reason != "CreateRunFailed" { + t.Errorf("Expected PipelineRun to be create run failed, but condition reason is %s", reconciledRun.Status.GetCondition(apis.ConditionSucceeded)) + } + } +} + +func TestHandleCustomRunCreationError(t *testing.T) { + prName := "customrun-creation-fails" + namespace := "default" + apiVersion := "example.dev/v1" + kind := "example" + pr := parse.MustParseV1PipelineRun(t, fmt.Sprintf(` +metadata: + name: %s + namespace: %s +spec: + pipelineSpec: + tasks: + - name: hello-world + taskSpec: + apiVersion: %s + kind: %s + spec: + foo: bar +`, prName, namespace, apiVersion, kind)) + customRunGK := schema.GroupKind{Group: apiVersion, Kind: kind} + + tcs := []struct { + name string + creationErr error + }{{ + name: "invalid", + creationErr: apierrors.NewInvalid(customRunGK, fmt.Sprintf("%s-hello-world", prName), field.ErrorList{}), + }, { + name: "bad request", + creationErr: apierrors.NewBadRequest("bad request"), + }} + for _, tc := range tcs { + d := test.Data{ + PipelineRuns: []*v1.PipelineRun{pr.DeepCopy()}, + } + testAssets, cancel := getPipelineRunController(t, d) + defer cancel() + c := testAssets.Controller + clients := testAssets.Clients + + // Create an error when the Pipeline client attempts to create CustomRuns + clients.Pipeline.PrependReactor("create", "customruns", func(action ktesting.Action) (bool, runtime.Object, error) { + return true, nil, tc.creationErr + }) + err := c.Reconciler.Reconcile(testAssets.Ctx, fmt.Sprintf("%s/%s", namespace, prName)) + if !controller.IsPermanentError(err) { + t.Errorf("expected permanent error but got %s", err) + } + reconciledRun, err := clients.Pipeline.TektonV1().PipelineRuns(namespace).Get(testAssets.Ctx, prName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("Somehow had error getting reconciled run out of fake client: %s", err) + } + + if reconciledRun.Status.CompletionTime == nil { + t.Errorf("Expected a CompletionTime on invalid PipelineRun but was nil") + } + + // The PipelineRun should be create run failed. + if reconciledRun.Status.GetCondition(apis.ConditionSucceeded).Reason != "CreateRunFailed" { + t.Errorf("Expected PipelineRun to be create run failed, but condition reason is %s", reconciledRun.Status.GetCondition(apis.ConditionSucceeded)) + } + } +} + func TestReconcileWithTimeoutsOfCompletedPipelineRun(t *testing.T) { // TestReconcileWithTimeoutsOfCompletedPipelineRun runs "Reconcile" on a PipelineRun that has completed and // which has a passed timeout.