Skip to content

Commit

Permalink
Fail PipelineRun when it can't create Runs
Browse files Browse the repository at this point in the history
Prior to this commit, if a PipelineRun couldn't create TaskRuns, the resulting
error would be requeued and the PipelineRun controller would continue to retry.
The PipelineRun status would represent the outcome of a previous reconcile loop,
which sometimes resulted in PipelineRun statuses that didn't match what was occuring
on the cluster. This commit marks a PipelineRun as failed and returns a permanent error
if TaskRun or CustomRun creation fails for a non-retryable reason.
  • Loading branch information
lbernick authored and tekton-robot committed Jul 10, 2023
1 parent 29ddf85 commit 937c8c0
Show file tree
Hide file tree
Showing 2 changed files with 150 additions and 13 deletions.
24 changes: 17 additions & 7 deletions pkg/reconciler/pipelinerun/pipelinerun.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ import (
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
k8slabels "k8s.io/apimachinery/pkg/labels"
Expand Down Expand Up @@ -769,13 +770,6 @@ func (c *Reconciler) runNextSchedulableTask(ctx context.Context, pr *v1.Pipeline
}
}

defer func() {
// If it is a permanent error, set pipelinerun to a failure state directly to avoid unnecessary retries.
if err != nil && controller.IsPermanentError(err) {
pr.Status.MarkFailed(ReasonCreateRunFailed, err.Error())
}
}()

if rpt.IsCustomTask() {
rpt.CustomRuns, err = c.createCustomRuns(ctx, rpt, pr)
if err != nil {
Expand Down Expand Up @@ -821,6 +815,7 @@ func (c *Reconciler) createTaskRuns(ctx context.Context, rpt *resources.Resolved
}
taskRun, err := c.createTaskRun(ctx, taskRunName, params, rpt, pr)
if err != nil {
err := c.handleRunCreationError(ctx, pr, err)
return nil, err
}
taskRuns = append(taskRuns, taskRun)
Expand Down Expand Up @@ -886,6 +881,20 @@ func (c *Reconciler) createTaskRun(ctx context.Context, taskRunName string, para
return c.PipelineClientSet.TektonV1().TaskRuns(pr.Namespace).Create(ctx, tr, metav1.CreateOptions{})
}

// handleRunCreationError marks the PipelineRun as failed and returns a permanent error if the run creation error is not retryable
func (c *Reconciler) handleRunCreationError(ctx context.Context, pr *v1.PipelineRun, err error) error {
if controller.IsPermanentError(err) {
pr.Status.MarkFailed(ReasonCreateRunFailed, err.Error())
return err
}
// This is not a complete list of permanent errors. Any permanent error with TaskRun/CustomRun creation can be added here.
if apierrors.IsInvalid(err) || apierrors.IsBadRequest(err) {
pr.Status.MarkFailed(ReasonCreateRunFailed, err.Error())
return controller.NewPermanentError(err)
}
return err
}

func (c *Reconciler) createCustomRuns(ctx context.Context, rpt *resources.ResolvedPipelineTask, pr *v1.PipelineRun) ([]*v1beta1.CustomRun, error) {
var customRuns []*v1beta1.CustomRun
ctx, span := c.tracerProvider.Tracer(TracerName).Start(ctx, "createCustomRuns")
Expand All @@ -902,6 +911,7 @@ func (c *Reconciler) createCustomRuns(ctx context.Context, rpt *resources.Resolv
}
customRun, err := c.createCustomRun(ctx, customRunName, params, rpt, pr)
if err != nil {
err := c.handleRunCreationError(ctx, pr, err)
return nil, err
}
customRuns = append(customRuns, customRun)
Expand Down
139 changes: 133 additions & 6 deletions pkg/reconciler/pipelinerun/pipelinerun_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,13 @@ import (
"gomodules.xyz/jsonpatch/v2"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/validation/field"
fakek8s "k8s.io/client-go/kubernetes/fake"
ktesting "k8s.io/client-go/testing"
"k8s.io/client-go/tools/record"
Expand Down Expand Up @@ -6600,7 +6603,6 @@ func (prt PipelineRunTest) reconcileRun(namespace, pipelineRunName string, wantE
// Check generated events match what's expected
if len(wantEvents) > 0 {
if err := k8sevent.CheckEventsOrdered(prt.Test, prt.TestAssets.Recorder.Events, pipelineRunName, wantEvents); err != nil {
prt.Test.Fatalf("falal added: %s:", err.Error())
prt.Test.Errorf(err.Error())
}
}
Expand Down Expand Up @@ -13113,12 +13115,12 @@ spec:
defer prt.Cancel()

wantEvents := []string{
"Normal Started",
"Warning TaskRunsCreationFailed",
"error creating TaskRuns called \\[test-pipeline-run-with-create-run-failed-hello-world]\\ for PipelineTask hello-world from PipelineRun test-pipeline-run-with-create-run-failed: expected workspace \"source\" to be provided by pipelinerun for pipeline task \"hello-world\"",
"error creating TaskRuns called \\[test-pipeline-run-with-create-run-failed-hello-world]\\ for PipelineTask hello-world from PipelineRun test-pipeline-run-with-create-run-failed: expected workspace \"source\" to be provided by pipelinerun for pipeline task \"hello-world\"",
"Normal Started ",
"Warning TaskRunsCreationFailed Failed to create TaskRuns [\"test-pipeline-run-with-create-run-failed-hello-world\"]: expected workspace \"source\" to be provided by pipelinerun for pipeline task \"hello-world\"",
"Warning Failed expected workspace \"source\" to be provided by pipelinerun for pipeline task \"hello-world\"",
"Warning InternalError 1 error occurred:\n\t* error creating TaskRuns called [test-pipeline-run-with-create-run-failed-hello-world] for PipelineTask hello-world from PipelineRun test-pipeline-run-with-create-run-failed: expected workspace \"source\" to be provided by pipelinerun for pipeline task \"hello-world\"\n\n",
}
reconciledRun, _ := prt.reconcileRun("foo", "test-pipeline-run-with-create-run-failed", wantEvents, true)
reconciledRun, _ := prt.reconcileRun("foo", "test-pipeline-run-with-create-run-failed", wantEvents, true /* permanentError */)

if reconciledRun.Status.CompletionTime == nil {
t.Errorf("Expected a CompletionTime on invalid PipelineRun but was nil")
Expand All @@ -13130,6 +13132,131 @@ spec:
}
}

func TestHandleTaskRunCreationError(t *testing.T) {
prName := "taskrun-creation-fails"
namespace := "default"
pr := parse.MustParseV1PipelineRun(t, fmt.Sprintf(`
metadata:
name: %s
namespace: %s
spec:
pipelineSpec:
tasks:
- name: hello-world
taskSpec:
steps:
- image: busybox
script: echo hello
`, prName, namespace))
taskRunGK := schema.GroupKind{Group: "tekton.dev/v1", Kind: "taskrun"}

tcs := []struct {
name string
creationErr error
}{{
name: "invalid",
creationErr: apierrors.NewInvalid(taskRunGK, fmt.Sprintf("%s-hello-world", prName), field.ErrorList{}),
}, {
name: "bad request",
creationErr: apierrors.NewBadRequest("bad request"),
}}
for _, tc := range tcs {
d := test.Data{
PipelineRuns: []*v1.PipelineRun{pr.DeepCopy()},
}
testAssets, cancel := getPipelineRunController(t, d)
defer cancel()
c := testAssets.Controller
clients := testAssets.Clients

// Create an error when the Pipeline client attempts to create TaskRuns
clients.Pipeline.PrependReactor("create", "taskruns", func(action ktesting.Action) (bool, runtime.Object, error) {
return true, nil, tc.creationErr
})
err := c.Reconciler.Reconcile(testAssets.Ctx, fmt.Sprintf("%s/%s", namespace, prName))
if !controller.IsPermanentError(err) {
t.Errorf("expected permanent error but got %s", err)
}
reconciledRun, err := clients.Pipeline.TektonV1().PipelineRuns(namespace).Get(testAssets.Ctx, prName, metav1.GetOptions{})
if err != nil {
t.Fatalf("Somehow had error getting reconciled run out of fake client: %s", err)
}

if reconciledRun.Status.CompletionTime == nil {
t.Errorf("Expected a CompletionTime on invalid PipelineRun but was nil")
}

// The PipelineRun should be create run failed.
if reconciledRun.Status.GetCondition(apis.ConditionSucceeded).Reason != "CreateRunFailed" {
t.Errorf("Expected PipelineRun to be create run failed, but condition reason is %s", reconciledRun.Status.GetCondition(apis.ConditionSucceeded))
}
}
}

func TestHandleCustomRunCreationError(t *testing.T) {
prName := "customrun-creation-fails"
namespace := "default"
apiVersion := "example.dev/v1"
kind := "example"
pr := parse.MustParseV1PipelineRun(t, fmt.Sprintf(`
metadata:
name: %s
namespace: %s
spec:
pipelineSpec:
tasks:
- name: hello-world
taskSpec:
apiVersion: %s
kind: %s
spec:
foo: bar
`, prName, namespace, apiVersion, kind))
customRunGK := schema.GroupKind{Group: apiVersion, Kind: kind}

tcs := []struct {
name string
creationErr error
}{{
name: "invalid",
creationErr: apierrors.NewInvalid(customRunGK, fmt.Sprintf("%s-hello-world", prName), field.ErrorList{}),
}, {
name: "bad request",
creationErr: apierrors.NewBadRequest("bad request"),
}}
for _, tc := range tcs {
d := test.Data{
PipelineRuns: []*v1.PipelineRun{pr.DeepCopy()},
}
testAssets, cancel := getPipelineRunController(t, d)
defer cancel()
c := testAssets.Controller
clients := testAssets.Clients

// Create an error when the Pipeline client attempts to create CustomRuns
clients.Pipeline.PrependReactor("create", "customruns", func(action ktesting.Action) (bool, runtime.Object, error) {
return true, nil, tc.creationErr
})
err := c.Reconciler.Reconcile(testAssets.Ctx, fmt.Sprintf("%s/%s", namespace, prName))
if !controller.IsPermanentError(err) {
t.Errorf("expected permanent error but got %s", err)
}
reconciledRun, err := clients.Pipeline.TektonV1().PipelineRuns(namespace).Get(testAssets.Ctx, prName, metav1.GetOptions{})
if err != nil {
t.Fatalf("Somehow had error getting reconciled run out of fake client: %s", err)
}

if reconciledRun.Status.CompletionTime == nil {
t.Errorf("Expected a CompletionTime on invalid PipelineRun but was nil")
}

// The PipelineRun should be create run failed.
if reconciledRun.Status.GetCondition(apis.ConditionSucceeded).Reason != "CreateRunFailed" {
t.Errorf("Expected PipelineRun to be create run failed, but condition reason is %s", reconciledRun.Status.GetCondition(apis.ConditionSucceeded))
}
}
}

func TestReconcileWithTimeoutsOfCompletedPipelineRun(t *testing.T) {
// TestReconcileWithTimeoutsOfCompletedPipelineRun runs "Reconcile" on a PipelineRun that has completed and
// which has a passed timeout.
Expand Down

0 comments on commit 937c8c0

Please sign in to comment.