From 8106a6ed1571401cf937dc204cefd78573745f3b Mon Sep 17 00:00:00 2001 From: Scott Seago Date: Wed, 13 Sep 2023 13:24:09 -0400 Subject: [PATCH] issue #6807: Retry failed create when using generateName When creating resources with generateName, apimachinery does not guarantee uniqueness when it appends the random suffix to the generateName stub, so if it fails with already exists error, we need to retry. Signed-off-by: Scott Seago --- pkg/cmd/cli/backup/delete.go | 6 +++++- pkg/cmd/cli/serverstatus/server_status.go | 6 +++++- pkg/controller/gc_controller.go | 5 ++++- pkg/podvolume/backupper.go | 7 ++++++- pkg/podvolume/restorer.go | 6 +++++- pkg/repository/ensurer.go | 6 +++++- pkg/restore/dataupload_retrieve_action.go | 6 +++++- 7 files changed, 35 insertions(+), 7 deletions(-) diff --git a/pkg/cmd/cli/backup/delete.go b/pkg/cmd/cli/backup/delete.go index 5d235bd72f1..149a2f5d802 100644 --- a/pkg/cmd/cli/backup/delete.go +++ b/pkg/cmd/cli/backup/delete.go @@ -22,9 +22,11 @@ import ( "github.com/pkg/errors" "github.com/spf13/cobra" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" kubeerrs "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/client-go/util/retry" controllerclient "sigs.k8s.io/controller-runtime/pkg/client" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" @@ -124,7 +126,9 @@ func Run(o *cli.DeleteOptions) error { ObjectMeta(builder.WithLabels(velerov1api.BackupNameLabel, label.GetValidName(b.Name), velerov1api.BackupUIDLabel, string(b.UID)), builder.WithGenerateName(b.Name+"-")).Result() - if err := o.Client.Create(context.TODO(), deleteRequest, &controllerclient.CreateOptions{}); err != nil { + if err := retry.OnError(retry.DefaultRetry, apierrors.IsAlreadyExists, func() error { + return o.Client.Create(context.TODO(), deleteRequest, &controllerclient.CreateOptions{}) + }); err != nil { errs = append(errs, err) continue } diff --git a/pkg/cmd/cli/serverstatus/server_status.go b/pkg/cmd/cli/serverstatus/server_status.go index 5bf99aff336..15c28b08a5f 100644 --- a/pkg/cmd/cli/serverstatus/server_status.go +++ b/pkg/cmd/cli/serverstatus/server_status.go @@ -21,7 +21,9 @@ import ( "time" "github.com/pkg/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" kbclient "sigs.k8s.io/controller-runtime/pkg/client" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" @@ -40,7 +42,9 @@ type DefaultServerStatusGetter struct { func (g *DefaultServerStatusGetter) GetServerStatus(kbClient kbclient.Client) (*velerov1api.ServerStatusRequest, error) { created := builder.ForServerStatusRequest(g.Namespace, "", "0").ObjectMeta(builder.WithGenerateName("velero-cli-")).Result() - if err := kbClient.Create(context.Background(), created, &kbclient.CreateOptions{}); err != nil { + if err := retry.OnError(retry.DefaultRetry, apierrors.IsAlreadyExists, func() error { + return kbClient.Create(context.Background(), created, &kbclient.CreateOptions{}) + }); err != nil { return nil, errors.WithStack(err) } diff --git a/pkg/controller/gc_controller.go b/pkg/controller/gc_controller.go index 3d1fe48c743..5d612b3a5e3 100644 --- a/pkg/controller/gc_controller.go +++ b/pkg/controller/gc_controller.go @@ -23,6 +23,7 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/client-go/util/retry" clocks "k8s.io/utils/clock" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" @@ -187,7 +188,9 @@ func (c *gcReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Re log.Info("Creating a new deletion request") ndbr := pkgbackup.NewDeleteBackupRequest(backup.Name, string(backup.UID)) ndbr.SetNamespace(backup.Namespace) - if err := c.Create(ctx, ndbr); err != nil { + if err := retry.OnError(retry.DefaultRetry, apierrors.IsAlreadyExists, func() error { + return c.Create(ctx, ndbr) + }); err != nil { log.WithError(err).Error("error creating DeleteBackupRequests") return ctrl.Result{}, errors.Wrap(err, "error creating DeleteBackupRequest") } diff --git a/pkg/podvolume/backupper.go b/pkg/podvolume/backupper.go index 57ab0c030f1..10b5a4af609 100644 --- a/pkg/podvolume/backupper.go +++ b/pkg/podvolume/backupper.go @@ -24,10 +24,12 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" corev1api "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" corev1client "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/retry" "github.com/vmware-tanzu/velero/internal/resourcepolicies" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" @@ -284,7 +286,10 @@ func (b *backupper) BackupPodVolumes(backup *velerov1api.Backup, pod *corev1api. } volumeBackup := newPodVolumeBackup(backup, pod, volume, repo.Spec.ResticIdentifier, b.uploaderType, pvc) - if _, err = b.veleroClient.VeleroV1().PodVolumeBackups(volumeBackup.Namespace).Create(context.TODO(), volumeBackup, metav1.CreateOptions{}); err != nil { + if err := retry.OnError(retry.DefaultRetry, apierrors.IsAlreadyExists, func() error { + _, err := b.veleroClient.VeleroV1().PodVolumeBackups(volumeBackup.Namespace).Create(context.TODO(), volumeBackup, metav1.CreateOptions{}) + return err + }); err != nil { errs = append(errs, err) continue } diff --git a/pkg/podvolume/restorer.go b/pkg/podvolume/restorer.go index 0011f3d473d..a2afdb5f13a 100644 --- a/pkg/podvolume/restorer.go +++ b/pkg/podvolume/restorer.go @@ -24,11 +24,13 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" corev1api "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" corev1client "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/retry" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" clientset "github.com/vmware-tanzu/velero/pkg/generated/clientset/versioned" @@ -172,7 +174,9 @@ func (r *restorer) RestorePodVolumes(data RestoreData) []error { volumeRestore := newPodVolumeRestore(data.Restore, data.Pod, data.BackupLocation, volume, backupInfo.snapshotID, repo.Spec.ResticIdentifier, backupInfo.uploaderType, data.SourceNamespace, pvc) - if err := errorOnly(r.veleroClient.VeleroV1().PodVolumeRestores(volumeRestore.Namespace).Create(context.TODO(), volumeRestore, metav1.CreateOptions{})); err != nil { + if err := retry.OnError(retry.DefaultRetry, apierrors.IsAlreadyExists, func() error { + return errorOnly(r.veleroClient.VeleroV1().PodVolumeRestores(volumeRestore.Namespace).Create(context.TODO(), volumeRestore, metav1.CreateOptions{})) + }); err != nil { errs = append(errs, errors.WithStack(err)) continue } diff --git a/pkg/repository/ensurer.go b/pkg/repository/ensurer.go index 885363328ce..7ab2d5fa3a8 100644 --- a/pkg/repository/ensurer.go +++ b/pkg/repository/ensurer.go @@ -23,7 +23,9 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" @@ -107,7 +109,9 @@ func (r *Ensurer) repoLock(key BackupRepositoryKey) *sync.Mutex { func (r *Ensurer) createBackupRepositoryAndWait(ctx context.Context, namespace string, backupRepoKey BackupRepositoryKey) (*velerov1api.BackupRepository, error) { toCreate := NewBackupRepository(namespace, backupRepoKey) - if err := r.repoClient.Create(ctx, toCreate, &client.CreateOptions{}); err != nil { + if err := retry.OnError(retry.DefaultRetry, apierrors.IsAlreadyExists, func() error { + return r.repoClient.Create(ctx, toCreate, &client.CreateOptions{}) + }); err != nil { return nil, errors.Wrap(err, "unable to create backup repository resource") } diff --git a/pkg/restore/dataupload_retrieve_action.go b/pkg/restore/dataupload_retrieve_action.go index c345e908ff0..5fe5e1e19a4 100644 --- a/pkg/restore/dataupload_retrieve_action.go +++ b/pkg/restore/dataupload_retrieve_action.go @@ -23,9 +23,11 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" corev1api "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" @@ -104,7 +106,9 @@ func (d *DataUploadRetrieveAction) Execute(input *velero.RestoreItemActionExecut }, } - err = d.client.Create(context.Background(), &cm, &client.CreateOptions{}) + err = retry.OnError(retry.DefaultRetry, apierrors.IsAlreadyExists, func() error { + return d.client.Create(context.Background(), &cm, &client.CreateOptions{}) + }) if err != nil { d.logger.Errorf("fail to create DataUploadResult ConfigMap %s/%s: %s", cm.Namespace, cm.Name, err.Error()) return nil, errors.Wrap(err, "fail to create DataUploadResult ConfigMap")