diff --git a/changelogs/unreleased/7135-blackpiglet b/changelogs/unreleased/7135-blackpiglet new file mode 100644 index 00000000000..ccd5d0690a1 --- /dev/null +++ b/changelogs/unreleased/7135-blackpiglet @@ -0,0 +1 @@ +Use VolumeInfo to help restore the PV. \ No newline at end of file diff --git a/design/pv_backup_info.md b/design/pv_backup_info.md index 107305fe5b4..90cd998cb22 100644 --- a/design/pv_backup_info.md +++ b/design/pv_backup_info.md @@ -125,7 +125,7 @@ type BackupStore interface { ### How the VolumeInfo array is used. #### Generate the PVC backed-up information summary -The downstream tools can use this VolumeInfo array to format and display their volume information. This is in the scope of this feature. +The downstream tools can use this VolumeInfo array to format and display their volume information. This is not in the scope of this feature. #### Retrieve volume backed-up information for `velero backup describe` command The `velero backup describe` can also use this VolumeInfo array structure to display the volume information. The snapshot data mover volume should use this structure at first, then the Velero native snapshot, CSI snapshot, and PodVolumeBackup can also use this structure. The detailed implementation is also not in this feature's scope. diff --git a/pkg/controller/restore_controller.go b/pkg/controller/restore_controller.go index f6b9b39d966..af680e7624e 100644 --- a/pkg/controller/restore_controller.go +++ b/pkg/controller/restore_controller.go @@ -56,6 +56,7 @@ import ( kubeutil "github.com/vmware-tanzu/velero/pkg/util/kube" "github.com/vmware-tanzu/velero/pkg/util/logging" "github.com/vmware-tanzu/velero/pkg/util/results" + "github.com/vmware-tanzu/velero/pkg/volume" ) // nonRestorableResources is an exclusion list for the restoration process. Any resources @@ -520,6 +521,15 @@ func (r *restoreReconciler) runValidatedRestore(restore *api.Restore, info backu return errors.Wrap(err, "fail to fetch CSI VolumeSnapshots metadata") } + volumeInfos, err := backupStore.GetBackupVolumeInfos(restore.Spec.BackupName) + if err != nil { + restoreLog.WithError(err).Infof("Backup %s doesn't have volumeinfos metadata file.", restore.Spec.BackupName) + } + backupVolumeInfoMap := make(map[string]volume.VolumeInfo) + for _, volumeInfo := range volumeInfos.VolumeInfos { + backupVolumeInfoMap[volumeInfo.PVName] = volumeInfo + } + restoreLog.Info("starting restore") var podVolumeBackups []*api.PodVolumeBackup @@ -537,6 +547,7 @@ func (r *restoreReconciler) runValidatedRestore(restore *api.Restore, info backu ResourceModifiers: resourceModifiers, DisableInformerCache: r.disableInformerCache, CSIVolumeSnapshots: csiVolumeSnapshots, + VolumeInfoMap: backupVolumeInfoMap, } restoreWarnings, restoreErrors := r.restorer.RestoreWithResolvers(restoreReq, actionsResolver, pluginManager) diff --git a/pkg/persistence/mocks/backup_store.go b/pkg/persistence/mocks/backup_store.go index 1bcdf865c46..b25e611b81e 100644 --- a/pkg/persistence/mocks/backup_store.go +++ b/pkg/persistence/mocks/backup_store.go @@ -314,6 +314,29 @@ func (_m *BackupStore) GetRestoreItemOperations(name string) ([]*itemoperation.R return r0, r1 } +// GetRestoreItemOperations provides a mock function with given fields: name +func (_m *BackupStore) GetBackupVolumeInfos(name string) (*volume.VolumeInfos, error) { + ret := _m.Called(name) + + var r0 *volume.VolumeInfos + if rf, ok := ret.Get(0).(func(string) *volume.VolumeInfos); ok { + r0 = rf(name) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*volume.VolumeInfos) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(string) error); ok { + r1 = rf(name) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // IsValid provides a mock function with given fields: func (_m *BackupStore) IsValid() error { ret := _m.Called() diff --git a/pkg/persistence/object_store.go b/pkg/persistence/object_store.go index 48a48dbf170..4ba61d392fb 100644 --- a/pkg/persistence/object_store.go +++ b/pkg/persistence/object_store.go @@ -73,6 +73,7 @@ type BackupStore interface { GetCSIVolumeSnapshots(name string) ([]*snapshotv1api.VolumeSnapshot, error) GetCSIVolumeSnapshotContents(name string) ([]*snapshotv1api.VolumeSnapshotContent, error) GetCSIVolumeSnapshotClasses(name string) ([]*snapshotv1api.VolumeSnapshotClass, error) + GetBackupVolumeInfos(name string) (*volume.VolumeInfos, error) // BackupExists checks if the backup metadata file exists in object storage. BackupExists(bucket, backupName string) (bool, error) diff --git a/pkg/restore/request.go b/pkg/restore/request.go index 2a267a5ffcf..e0fe44166c6 100644 --- a/pkg/restore/request.go +++ b/pkg/restore/request.go @@ -62,6 +62,7 @@ type Request struct { ResourceModifiers *resourcemodifiers.ResourceModifiers DisableInformerCache bool CSIVolumeSnapshots []*snapshotv1api.VolumeSnapshot + VolumeInfoMap map[string]volume.VolumeInfo } type restoredItemStatus struct { diff --git a/pkg/restore/restore.go b/pkg/restore/restore.go index ea0af47c97e..32e09af67dd 100644 --- a/pkg/restore/restore.go +++ b/pkg/restore/restore.go @@ -80,6 +80,14 @@ var resourceMustHave = []string{ "datauploads.velero.io", } +const ( + pvHasNativeSnapshot = iota + 1 + pvHasPodVolumeBackup + pvHasCSIVolumeSnapshot + pvHasReclaimPolicyAsDelete + pvHasReclaimPolicyAsRetain +) + type VolumeSnapshotterGetter interface { GetVolumeSnapshotter(name string) (vsv1.VolumeSnapshotter, error) } @@ -325,6 +333,7 @@ func (kr *kubernetesRestorer) RestoreWithResolvers( resourceModifiers: req.ResourceModifiers, disableInformerCache: req.DisableInformerCache, featureVerifier: kr.featureVerifier, + volumeInfoMap: req.VolumeInfoMap, } return restoreCtx.execute() @@ -377,6 +386,7 @@ type restoreContext struct { resourceModifiers *resourcemodifiers.ResourceModifiers disableInformerCache bool featureVerifier features.Verifier + volumeInfoMap map[string]volume.VolumeInfo } type resourceClientKey struct { @@ -1097,15 +1107,17 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso itemExists := false resourceID := getResourceID(groupResource, namespace, obj.GetName()) + restoreLogger := ctx.log.WithFields(logrus.Fields{ + "namespace": obj.GetNamespace(), + "name": obj.GetName(), + "groupResource": groupResource.String(), + }) + // Check if group/resource should be restored. We need to do this here since // this method may be getting called for an additional item which is a group/resource // that's excluded. if !ctx.resourceIncludesExcludes.ShouldInclude(groupResource.String()) { - ctx.log.WithFields(logrus.Fields{ - "namespace": obj.GetNamespace(), - "name": obj.GetName(), - "groupResource": groupResource.String(), - }).Info("Not restoring item because resource is excluded") + restoreLogger.Info("Not restoring item because resource is excluded") return warnings, errs, itemExists } @@ -1117,11 +1129,7 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso // to check the *original* namespace, not the remapped one if it's been remapped. if namespace != "" { if !ctx.namespaceIncludesExcludes.ShouldInclude(obj.GetNamespace()) && !ctx.resourceMustHave.Has(groupResource.String()) { - ctx.log.WithFields(logrus.Fields{ - "namespace": obj.GetNamespace(), - "name": obj.GetName(), - "groupResource": groupResource.String(), - }).Info("Not restoring item because namespace is excluded") + restoreLogger.Info("Not restoring item because namespace is excluded") return warnings, errs, itemExists } @@ -1145,11 +1153,7 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso } } else { if boolptr.IsSetToFalse(ctx.restore.Spec.IncludeClusterResources) { - ctx.log.WithFields(logrus.Fields{ - "namespace": obj.GetNamespace(), - "name": obj.GetName(), - "groupResource": groupResource.String(), - }).Info("Not restoring item because it's cluster-scoped") + restoreLogger.Info("Not restoring item because it's cluster-scoped") return warnings, errs, itemExists } } @@ -1213,123 +1217,69 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso } if groupResource == kuberesource.PersistentVolumes { - switch { - case hasSnapshot(name, ctx.volumeSnapshots): - oldName := obj.GetName() - shouldRenamePV, err := shouldRenamePV(ctx, obj, resourceClient) - if err != nil { - errs.Add(namespace, err) - return warnings, errs, itemExists + pvCondition := 0 + if volumeInfo, ok := ctx.volumeInfoMap[obj.GetName()]; ok { + ctx.log.Info("Find VolumeInfo for PV %s.", obj.GetName()) + + switch volumeInfo.BackupMethod { + case volume.NativeSnapshot: + pvCondition = pvHasNativeSnapshot + case volume.PodVolumeBackup: + pvCondition = pvHasPodVolumeBackup + // Both the CSI snapshot and snapshot data move fall into this case. + case volume.CSISnapshot: + pvCondition = pvHasCSIVolumeSnapshot + default: + if hasDeleteReclaimPolicy(obj.Object) { + pvCondition = pvHasReclaimPolicyAsDelete + } else { + pvCondition = pvHasReclaimPolicyAsRetain + } } + } else { + // TODO: VolumeInfo is adopted and old logic is deprecated in v1.13. + // Remove the old logic in v1.15. + ctx.log.Info("Cannot find VolumeInfo for PV %s.", obj.GetName()) + + switch { + case hasSnapshot(name, ctx.volumeSnapshots): + pvCondition = pvHasNativeSnapshot + case hasPodVolumeBackup(obj, ctx): + pvCondition = pvHasPodVolumeBackup + case hasCSIVolumeSnapshot(ctx, obj): + pvCondition = pvHasCSIVolumeSnapshot + case hasSnapshotDataUpload(ctx, obj): + pvCondition = pvHasCSIVolumeSnapshot + case hasDeleteReclaimPolicy(obj.Object): + pvCondition = pvHasReclaimPolicyAsDelete + default: + pvCondition = pvHasReclaimPolicyAsRetain - // Check to see if the claimRef.namespace field needs to be remapped, - // and do so if necessary. - _, err = remapClaimRefNS(ctx, obj) + } + } + + switch pvCondition { + case pvHasNativeSnapshot: + newPVName, err := ctx.handlePVHasNativeSnapshot(obj, resourceClient) if err != nil { errs.Add(namespace, err) return warnings, errs, itemExists } - var shouldRestoreSnapshot bool - if !shouldRenamePV { - // Check if the PV exists in the cluster before attempting to create - // a volume from the snapshot, in order to avoid orphaned volumes (GH #609) - shouldRestoreSnapshot, err = ctx.shouldRestore(name, resourceClient) - if err != nil { - errs.Add(namespace, errors.Wrapf(err, "error waiting on in-cluster persistentvolume %s", name)) - return warnings, errs, itemExists - } - } else { - // If we're renaming the PV, we're going to give it a new random name, - // so we can assume it doesn't already exist in the cluster and therefore - // we should proceed with restoring from snapshot. - shouldRestoreSnapshot = true - } - - if shouldRestoreSnapshot { - // Reset the PV's binding status so that Kubernetes can properly - // associate it with the restored PVC. - obj = resetVolumeBindingInfo(obj) - - // Even if we're renaming the PV, obj still has the old name here, because the pvRestorer - // uses the original name to look up metadata about the snapshot. - ctx.log.Infof("Restoring persistent volume from snapshot.") - updatedObj, err := ctx.pvRestorer.executePVAction(obj) - if err != nil { - errs.Add(namespace, fmt.Errorf("error executing PVAction for %s: %v", resourceID, err)) - return warnings, errs, itemExists - } - obj = updatedObj - - // VolumeSnapshotter has modified the PV name, we should rename the PV. - if oldName != obj.GetName() { - shouldRenamePV = true - } + if newPVName != "" { + name = newPVName } - if shouldRenamePV { - var pvName string - if oldName == obj.GetName() { - // pvRestorer hasn't modified the PV name, we need to rename the PV. - pvName, err = ctx.pvRenamer(oldName) - if err != nil { - errs.Add(namespace, errors.Wrapf(err, "error renaming PV")) - return warnings, errs, itemExists - } - } else { - // VolumeSnapshotter could have modified the PV name through - // function `SetVolumeID`, - pvName = obj.GetName() - } - - ctx.renamedPVs[oldName] = pvName - obj.SetName(pvName) - name = pvName - - // Add the original PV name as an annotation. - annotations := obj.GetAnnotations() - if annotations == nil { - annotations = map[string]string{} - } - annotations["velero.io/original-pv-name"] = oldName - obj.SetAnnotations(annotations) - } - - case hasPodVolumeBackup(obj, ctx): - ctx.log.WithFields(logrus.Fields{ - "namespace": obj.GetNamespace(), - "name": obj.GetName(), - "groupResource": groupResource.String(), - }).Infof("Dynamically re-provisioning persistent volume because it has a pod volume backup to be restored.") - ctx.pvsToProvision.Insert(name) - - // Return early because we don't want to restore the PV itself, we - // want to dynamically re-provision it. - return warnings, errs, itemExists - - case hasCSIVolumeSnapshot(ctx, obj): - ctx.log.WithFields(logrus.Fields{ - "namespace": obj.GetNamespace(), - "name": obj.GetName(), - "groupResource": groupResource.String(), - }).Infof("Dynamically re-provisioning persistent volume because it has a related CSI VolumeSnapshot.") + case pvHasPodVolumeBackup: + restoreLogger.Infof("Dynamically re-provisioning persistent volume because it has a pod volume backup to be restored.") ctx.pvsToProvision.Insert(name) - if ready, err := ctx.featureVerifier.Verify(velerov1api.CSIFeatureFlag); !ready { - ctx.log.Errorf("Failed to verify CSI modules, ready %v, err %v", ready, err) - errs.Add(namespace, fmt.Errorf("CSI modules are not ready for restore. Check CSI feature is enabled and CSI plugin is installed")) - } - // Return early because we don't want to restore the PV itself, we // want to dynamically re-provision it. return warnings, errs, itemExists - case hasSnapshotDataUpload(ctx, obj): - ctx.log.WithFields(logrus.Fields{ - "namespace": obj.GetNamespace(), - "name": obj.GetName(), - "groupResource": groupResource.String(), - }).Infof("Dynamically re-provisioning persistent volume because it has a related snapshot DataUpload.") + case pvHasCSIVolumeSnapshot: + restoreLogger.Infof("Dynamically re-provisioning persistent volume because it has a related snapshot DataUpload.") ctx.pvsToProvision.Insert(name) if ready, err := ctx.featureVerifier.Verify(velerov1api.CSIFeatureFlag); !ready { @@ -1341,24 +1291,16 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso // want to dynamically re-provision it. return warnings, errs, itemExists - case hasDeleteReclaimPolicy(obj.Object): - ctx.log.WithFields(logrus.Fields{ - "namespace": obj.GetNamespace(), - "name": obj.GetName(), - "groupResource": groupResource.String(), - }).Infof("Dynamically re-provisioning persistent volume because it doesn't have a snapshot and its reclaim policy is Delete.") + case pvHasReclaimPolicyAsDelete: + restoreLogger.Infof("Dynamically re-provisioning persistent volume because it doesn't have a snapshot and its reclaim policy is Delete.") ctx.pvsToProvision.Insert(name) // Return early because we don't want to restore the PV itself, we // want to dynamically re-provision it. return warnings, errs, itemExists - default: - ctx.log.WithFields(logrus.Fields{ - "namespace": obj.GetNamespace(), - "name": obj.GetName(), - "groupResource": groupResource.String(), - }).Infof("Restoring persistent volume as-is because it doesn't have a snapshot and its reclaim policy is not Delete.") + case pvHasReclaimPolicyAsRetain: + restoreLogger.Infof("Restoring persistent volume as-is because it doesn't have a snapshot and its reclaim policy is not Delete.") // Check to see if the claimRef.namespace field needs to be remapped, and do so if necessary. _, err = remapClaimRefNS(ctx, obj) @@ -2464,3 +2406,82 @@ func (ctx *restoreContext) processUpdateResourcePolicy(fromCluster, fromClusterW } return warnings, errs } + +func (c *restoreContext) handlePVHasNativeSnapshot(obj *unstructured.Unstructured, resourceClient client.Dynamic) (newPVName string, err error) { + oldName := obj.GetName() + shouldRenamePV, err := shouldRenamePV(c, obj, resourceClient) + if err != nil { + return "", err + } + + // Check to see if the claimRef.namespace field needs to be remapped, + // and do so if necessary. + _, err = remapClaimRefNS(c, obj) + if err != nil { + return "", err + } + + var shouldRestoreSnapshot bool + if !shouldRenamePV { + // Check if the PV exists in the cluster before attempting to create + // a volume from the snapshot, in order to avoid orphaned volumes (GH #609) + shouldRestoreSnapshot, err = c.shouldRestore(oldName, resourceClient) + if err != nil { + return "", errors.Wrapf(err, "error waiting on in-cluster persistentvolume %s", oldName) + } + } else { + // If we're renaming the PV, we're going to give it a new random name, + // so we can assume it doesn't already exist in the cluster and therefore + // we should proceed with restoring from snapshot. + shouldRestoreSnapshot = true + } + + if shouldRestoreSnapshot { + // Reset the PV's binding status so that Kubernetes can properly + // associate it with the restored PVC. + obj = resetVolumeBindingInfo(obj) + + // Even if we're renaming the PV, obj still has the old name here, because the pvRestorer + // uses the original name to look up metadata about the snapshot. + c.log.Infof("Restoring persistent volume from snapshot.") + updatedObj, err := c.pvRestorer.executePVAction(obj) + if err != nil { + return "", fmt.Errorf("error executing PVAction for %s: %v", getResourceID(kuberesource.PersistentVolumes, "", oldName), err) + } + obj = updatedObj + + // VolumeSnapshotter has modified the PV name, we should rename the PV. + if oldName != obj.GetName() { + shouldRenamePV = true + } + } + + if shouldRenamePV { + var pvName string + if oldName == obj.GetName() { + // pvRestorer hasn't modified the PV name, we need to rename the PV. + pvName, err = c.pvRenamer(oldName) + if err != nil { + return "", errors.Wrapf(err, "error renaming PV") + } + } else { + // VolumeSnapshotter could have modified the PV name through + // function `SetVolumeID`, + pvName = obj.GetName() + } + + c.renamedPVs[oldName] = pvName + obj.SetName(pvName) + newPVName = pvName + + // Add the original PV name as an annotation. + annotations := obj.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + annotations["velero.io/original-pv-name"] = oldName + obj.SetAnnotations(annotations) + } + + return newPVName, nil +} diff --git a/pkg/restore/restore_test.go b/pkg/restore/restore_test.go index d2f86e3c785..3933d34bbff 100644 --- a/pkg/restore/restore_test.go +++ b/pkg/restore/restore_test.go @@ -61,6 +61,51 @@ import ( "github.com/vmware-tanzu/velero/pkg/volume" ) +/* +func TestRestorePVWithVolumeInfo(t *testing.T) { + tests := []struct { + name string + restore *velerov1api.Restore + backup *velerov1api.Backup + apiResources []*test.APIResource + tarball io.Reader + want map[*test.APIResource][]string + volumeInfoMap map[string]volume.VolumeInfo + }{ + {}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + h := newHarness(t) + + for _, r := range tc.apiResources { + h.DiscoveryClient.WithAPIResource(r) + } + require.NoError(t, h.restorer.discoveryHelper.Refresh()) + + data := &Request{ + Log: h.log, + Restore: tc.restore, + Backup: tc.backup, + PodVolumeBackups: nil, + VolumeSnapshots: nil, + BackupReader: tc.tarball, + VolumeInfoMap: tc.volumeInfoMap, + } + warnings, errs := h.restorer.Restore( + data, + nil, // restoreItemActions + nil, // volume snapshotter getter + ) + + assertEmptyResults(t, warnings, errs) + assertAPIContents(t, h, tc.want) + }) + } +} +*/ + // TestRestoreResourceFiltering runs restores with different combinations // of resource filters (included/excluded resources, included/excluded // namespaces, label selectors, "include cluster resources" flag), and