diff --git a/changelogs/unreleased/7102-Lyndon-Li b/changelogs/unreleased/7102-Lyndon-Li new file mode 100644 index 0000000000..4b5b81ffdc --- /dev/null +++ b/changelogs/unreleased/7102-Lyndon-Li @@ -0,0 +1 @@ +Fix issue #7068, due to a behavior of CSI external snapshotter, manipulations of VS and VSC may not be handled in the same order inside external snapshotter as the API is called. So add a protection finalizer to ensure the order \ No newline at end of file diff --git a/pkg/exposer/csi_snapshot.go b/pkg/exposer/csi_snapshot.go index 9fb25d70d7..9979c5fddb 100644 --- a/pkg/exposer/csi_snapshot.go +++ b/pkg/exposer/csi_snapshot.go @@ -119,8 +119,6 @@ func (e *csiSnapshotExposer) Expose(ctx context.Context, ownerObject corev1.Obje return errors.Wrap(err, "error to retain volume snapshot content") } - vsc = retained - curLog.WithField("vsc name", vsc.Name).WithField("retained", (retained != nil)).Info("Finished to retain VSC") defer func() { @@ -136,7 +134,14 @@ func (e *csiSnapshotExposer) Expose(ctx context.Context, ownerObject corev1.Obje curLog.WithField("vs name", volumeSnapshot.Name).Infof("VS is deleted in namespace %s", volumeSnapshot.Namespace) - err = csi.EnsureDeleteVSC(ctx, e.csiSnapshotClient, vsc, csiExposeParam.OperationTimeout) + err = csi.RemoveVSCProtect(ctx, e.csiSnapshotClient, vsc.Name, csiExposeParam.ExposeTimeout) + if err != nil { + return errors.Wrap(err, "error to remove protect from volume snapshot content") + } + + curLog.WithField("vsc name", vsc.Name).Infof("Removed protect from VSC") + + err = csi.EnsureDeleteVSC(ctx, e.csiSnapshotClient, vsc.Name, csiExposeParam.OperationTimeout) if err != nil { return errors.Wrap(err, "error to delete volume snapshot content") } diff --git a/pkg/util/csi/volume_snapshot.go b/pkg/util/csi/volume_snapshot.go index 1440ab6e11..a8ade7acd0 100644 --- a/pkg/util/csi/volume_snapshot.go +++ b/pkg/util/csi/volume_snapshot.go @@ -152,36 +152,53 @@ func EnsureDeleteVS(ctx context.Context, snapshotClient snapshotter.SnapshotV1In return nil } -// EnsureDeleteVSC asserts the existence of a VSC by name, deletes it and waits for its disappearance and returns errors on any failure -func EnsureDeleteVSC(ctx context.Context, snapshotClient snapshotter.SnapshotV1Interface, - vsc *snapshotv1api.VolumeSnapshotContent, timeout time.Duration) error { - _, err := patchVSC(ctx, snapshotClient, vsc, func(updated *snapshotv1api.VolumeSnapshotContent) { - updated.Finalizers = stringslice.Except(updated.Finalizers, volumeSnapshotContentProtectFinalizer) +func RemoveVSCProtect(ctx context.Context, snapshotClient snapshotter.SnapshotV1Interface, vscName string, timeout time.Duration) error { + err := wait.PollImmediate(waitInternal, timeout, func() (bool, error) { + vsc, err := snapshotClient.VolumeSnapshotContents().Get(ctx, vscName, metav1.GetOptions{}) + if err != nil { + return false, errors.Wrapf(err, "error to get VolumeSnapshotContent %s", vscName) + } + + vsc.Finalizers = stringslice.Except(vsc.Finalizers, volumeSnapshotContentProtectFinalizer) + + _, err = snapshotClient.VolumeSnapshotContents().Update(ctx, vsc, metav1.UpdateOptions{}) + if err == nil { + return true, nil + } + + if !apierrors.IsConflict(err) { + return false, errors.Wrapf(err, "error to update VolumeSnapshotContent %s", vscName) + } + + return false, nil }) - if err != nil { - return errors.Wrapf(err, "error to remove protect finalizer from vsc %s", vsc.Name) - } - err = snapshotClient.VolumeSnapshotContents().Delete(ctx, vsc.Name, metav1.DeleteOptions{}) + return err +} + +// EnsureDeleteVSC asserts the existence of a VSC by name, deletes it and waits for its disappearance and returns errors on any failure +func EnsureDeleteVSC(ctx context.Context, snapshotClient snapshotter.SnapshotV1Interface, + vscName string, timeout time.Duration) error { + err := snapshotClient.VolumeSnapshotContents().Delete(ctx, vscName, metav1.DeleteOptions{}) if err != nil && !apierrors.IsNotFound(err) { return errors.Wrap(err, "error to delete volume snapshot content") } err = wait.PollImmediate(waitInternal, timeout, func() (bool, error) { - _, err := snapshotClient.VolumeSnapshotContents().Get(ctx, vsc.Name, metav1.GetOptions{}) + _, err := snapshotClient.VolumeSnapshotContents().Get(ctx, vscName, metav1.GetOptions{}) if err != nil { if apierrors.IsNotFound(err) { return true, nil } - return false, errors.Wrapf(err, fmt.Sprintf("error to get VolumeSnapshotContent %s", vsc.Name)) + return false, errors.Wrapf(err, fmt.Sprintf("error to get VolumeSnapshotContent %s", vscName)) } return false, nil }) if err != nil { - return errors.Wrapf(err, "error to assure VolumeSnapshotContent is deleted, %s", vsc.Name) + return errors.Wrapf(err, "error to assure VolumeSnapshotContent is deleted, %s", vscName) } return nil diff --git a/pkg/util/csi/volume_snapshot_test.go b/pkg/util/csi/volume_snapshot_test.go index 9e4ecec36d..4fbac34f5c 100644 --- a/pkg/util/csi/volume_snapshot_test.go +++ b/pkg/util/csi/volume_snapshot_test.go @@ -34,6 +34,8 @@ import ( "github.com/vmware-tanzu/velero/pkg/util/stringptr" velerotest "github.com/vmware-tanzu/velero/pkg/test" + + apierrors "k8s.io/apimachinery/pkg/api/errors" ) type reactor struct { @@ -360,26 +362,16 @@ func TestEnsureDeleteVSC(t *testing.T) { name string clientObj []runtime.Object reactors []reactor - vscObj *snapshotv1api.VolumeSnapshotContent + vscName string err string }{ { - name: "remove finalizer fail", - vscObj: vscObj, - reactors: []reactor{ - { - verb: "patch", - resource: "volumesnapshotcontents", - reactorFunc: func(action clientTesting.Action) (handled bool, ret runtime.Object, err error) { - return true, nil, errors.New("fake-patch-error") - }, - }, - }, - err: "error to remove protect finalizer from vsc fake-vsc: error patching VSC: fake-patch-error", + name: "delete fail on VSC not found", + vscName: "fake-vsc", }, { - name: "delete fail", - vscObj: vscObj, + name: "delete fail on others", + vscName: "fake-vsc", clientObj: []runtime.Object{vscObj}, reactors: []reactor{ { @@ -394,7 +386,7 @@ func TestEnsureDeleteVSC(t *testing.T) { }, { name: "wait fail", - vscObj: vscObj, + vscName: "fake-vsc", clientObj: []runtime.Object{vscObj}, reactors: []reactor{ { @@ -409,7 +401,7 @@ func TestEnsureDeleteVSC(t *testing.T) { }, { name: "success", - vscObj: vscObj, + vscName: "fake-vsc", clientObj: []runtime.Object{vscObj}, }, } @@ -422,8 +414,8 @@ func TestEnsureDeleteVSC(t *testing.T) { fakeSnapshotClient.Fake.PrependReactor(reactor.verb, reactor.resource, reactor.reactorFunc) } - err := EnsureDeleteVSC(context.Background(), fakeSnapshotClient.SnapshotV1(), test.vscObj, time.Millisecond) - if err != nil { + err := EnsureDeleteVSC(context.Background(), fakeSnapshotClient.SnapshotV1(), test.vscName, time.Millisecond) + if test.err != "" { assert.EqualError(t, err, test.err) } else { assert.NoError(t, err) @@ -659,3 +651,98 @@ func TestRetainVSC(t *testing.T) { }) } } + +func TestRemoveVSCProtect(t *testing.T) { + vscObj := &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vsc", + Finalizers: []string{volumeSnapshotContentProtectFinalizer}, + }, + } + + tests := []struct { + name string + clientObj []runtime.Object + reactors []reactor + vsc string + updated *snapshotv1api.VolumeSnapshotContent + timeout time.Duration + err string + }{ + { + name: "get vsc error", + vsc: "fake-vsc", + err: "error to get VolumeSnapshotContent fake-vsc: volumesnapshotcontents.snapshot.storage.k8s.io \"fake-vsc\" not found", + }, + { + name: "update vsc fail", + vsc: "fake-vsc", + clientObj: []runtime.Object{vscObj}, + reactors: []reactor{ + { + verb: "update", + resource: "volumesnapshotcontents", + reactorFunc: func(action clientTesting.Action) (handled bool, ret runtime.Object, err error) { + return true, nil, errors.New("fake-update-error") + }, + }, + }, + err: "error to update VolumeSnapshotContent fake-vsc: fake-update-error", + }, + { + name: "update vsc timeout", + vsc: "fake-vsc", + clientObj: []runtime.Object{vscObj}, + reactors: []reactor{ + { + verb: "update", + resource: "volumesnapshotcontents", + reactorFunc: func(action clientTesting.Action) (handled bool, ret runtime.Object, err error) { + return true, nil, &apierrors.StatusError{ErrStatus: metav1.Status{ + Reason: metav1.StatusReasonConflict, + }} + }, + }, + }, + timeout: time.Second, + err: "timed out waiting for the condition", + }, + { + name: "succeed", + vsc: "fake-vsc", + clientObj: []runtime.Object{vscObj}, + timeout: time.Second, + updated: &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vsc", + Finalizers: []string{}, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + fakeSnapshotClient := snapshotFake.NewSimpleClientset(test.clientObj...) + + for _, reactor := range test.reactors { + fakeSnapshotClient.Fake.PrependReactor(reactor.verb, reactor.resource, reactor.reactorFunc) + } + + err := RemoveVSCProtect(context.Background(), fakeSnapshotClient.SnapshotV1(), test.vsc, test.timeout) + + if len(test.err) == 0 { + assert.NoError(t, err) + } else { + assert.EqualError(t, err, test.err) + } + + if test.updated != nil { + updated, err := fakeSnapshotClient.SnapshotV1().VolumeSnapshotContents().Get(context.Background(), test.vsc, metav1.GetOptions{}) + assert.NoError(t, err) + + assert.Equal(t, test.updated.Finalizers, updated.Finalizers) + } + }) + } +}