Skip to content

Commit

Permalink
repo maintenance job out of repo manager
Browse files Browse the repository at this point in the history
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
  • Loading branch information
Lyndon-Li committed Jan 6, 2025
1 parent 912b116 commit db69829
Show file tree
Hide file tree
Showing 9 changed files with 773 additions and 578 deletions.
87 changes: 46 additions & 41 deletions pkg/cmd/cli/repomantenance/maintenance.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,33 @@ import (
"fmt"
"os"
"strings"
"time"

"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/vmware-tanzu/velero/internal/credentials"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerocli "github.com/vmware-tanzu/velero/pkg/client"
"github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/repository/provider"
"github.com/vmware-tanzu/velero/pkg/util/filesystem"
"github.com/vmware-tanzu/velero/pkg/util/logging"

repokey "github.com/vmware-tanzu/velero/pkg/repository/keys"
repomanager "github.com/vmware-tanzu/velero/pkg/repository/manager"
)

type Options struct {
RepoName string
BackupStorageLocation string
RepoType string
ResourceTimeout time.Duration
LogLevelFlag *logging.LevelFlag
FormatFlag *logging.FormatFlag
}
Expand Down Expand Up @@ -83,63 +88,80 @@ func (o *Options) Run(f velerocli.Factory) {
}
}

func (o *Options) initClient(f velerocli.Factory) (client.Client, error) {
func (o *Options) initClient(f velerocli.Factory) (client.Client, kubernetes.Interface, error) {
scheme := runtime.NewScheme()
err := velerov1api.AddToScheme(scheme)
if err != nil {
return nil, errors.Wrap(err, "failed to add velero scheme")
return nil, nil, errors.Wrap(err, "failed to add velero scheme")
}

err = v1.AddToScheme(scheme)
if err != nil {
return nil, errors.Wrap(err, "failed to add api core scheme")
return nil, nil, errors.Wrap(err, "failed to add api core scheme")
}

config, err := f.ClientConfig()
if err != nil {
return nil, errors.Wrap(err, "failed to get client config")
return nil, nil, errors.Wrap(err, "failed to get client config")
}

cli, err := client.New(config, client.Options{
Scheme: scheme,
})
if err != nil {
return nil, errors.Wrap(err, "failed to create client")
return nil, nil, errors.Wrap(err, "failed to create client")
}

kubeClient, err := f.KubeClient()
if err != nil {
return nil, nil, errors.Wrap(err, "failed to create kube client")
}

return cli, nil
return cli, kubeClient, nil
}

func (o *Options) runRepoPrune(f velerocli.Factory, namespace string, logger logrus.FieldLogger) error {
cli, err := o.initClient(f)
if err != nil {
return err
func initRepoManager(namespace string, kubeClient kubernetes.Interface, cli client.Client, logger logrus.FieldLogger) (repomanager.Manager, error) {
// ensure the repo key secret is set up
if err := repokey.EnsureCommonRepositoryKey(kubeClient.CoreV1(), namespace); err != nil {
return nil, errors.Wrap(err, "failed to ensure repository key")
}

repoLocker := repository.NewRepoLocker()

credentialFileStore, err := credentials.NewNamespacedFileStore(
cli,
namespace,
"/tmp/credentials",
filesystem.NewFileSystem(),
)
if err != nil {
return errors.Wrap(err, "failed to create namespaced file store")
return nil, errors.Wrap(err, "failed to create namespaced file store")
}

credentialSecretStore, err := credentials.NewNamespacedSecretStore(cli, namespace)
if err != nil {
return errors.Wrap(err, "failed to create namespaced secret store")
return nil, errors.Wrap(err, "failed to create namespaced secret store")
}

var repoProvider provider.Provider
if o.RepoType == velerov1api.BackupRepositoryTypeRestic {
repoProvider = provider.NewResticRepositoryProvider(credentialFileStore, filesystem.NewFileSystem(), logger)
} else {
repoProvider = provider.NewUnifiedRepoProvider(
credentials.CredentialGetter{
FromFile: credentialFileStore,
FromSecret: credentialSecretStore,
}, o.RepoType, logger)
return repomanager.NewManager(
namespace,
cli,
repoLocker,
credentialFileStore,
credentialSecretStore,
logger,
), nil
}

func (o *Options) runRepoPrune(f velerocli.Factory, namespace string, logger logrus.FieldLogger) error {
cli, kubeClient, err := o.initClient(f)
if err != nil {
return err
}

manager, err := initRepoManager(namespace, kubeClient, cli, logger)
if err != nil {
return err
}

// backupRepository
Expand All @@ -149,31 +171,14 @@ func (o *Options) runRepoPrune(f velerocli.Factory, namespace string, logger log
BackupLocation: o.BackupStorageLocation,
RepositoryType: o.RepoType,
}, true)

if err != nil {
return errors.Wrap(err, "failed to get backup repository")
}

// bsl
bsl := &velerov1api.BackupStorageLocation{}
err = cli.Get(context.Background(), client.ObjectKey{Namespace: namespace, Name: repo.Spec.BackupStorageLocation}, bsl)
if err != nil {
return errors.Wrap(err, "failed to get backup storage location")
}

para := provider.RepoParam{
BackupRepo: repo,
BackupLocation: bsl,
}

err = repoProvider.BoostRepoConnect(context.Background(), para)
if err != nil {
return errors.Wrap(err, "failed to boost repo connect")
}

err = repoProvider.PruneRepo(context.Background(), para)
err = manager.PruneRepo(repo)
if err != nil {
return errors.Wrap(err, "failed to prune repo")
}

return nil
}
13 changes: 6 additions & 7 deletions pkg/cmd/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -491,15 +491,9 @@ func (s *server) initRepoManager() error {
s.namespace,
s.mgr.GetClient(),
s.repoLocker,
s.repoEnsurer,
s.credentialFileStore,
s.credentialSecretStore,
s.config.RepoMaintenanceJobConfig,
s.config.PodResources,
s.config.KeepLatestMaintenanceJobs,
s.logger,
s.logLevel,
s.config.LogFormat,
)

return nil
Expand Down Expand Up @@ -720,9 +714,14 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
s.namespace,
s.logger,
s.mgr.GetClient(),
s.repoManager,
s.config.RepoMaintenanceFrequency,
s.config.BackupRepoConfig,
s.repoManager,
s.config.KeepLatestMaintenanceJobs,
s.config.RepoMaintenanceJobConfig,
s.config.PodResources,
s.logLevel,
s.config.LogFormat,
).SetupWithManager(s.mgr); err != nil {
s.logger.Fatal(err, "unable to create controller", "controller", constant.ControllerBackupRepo)
}
Expand Down
75 changes: 55 additions & 20 deletions pkg/controller/backup_repository_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ import (
repoconfig "github.com/vmware-tanzu/velero/pkg/repository/config"
repomanager "github.com/vmware-tanzu/velero/pkg/repository/manager"
"github.com/vmware-tanzu/velero/pkg/util/kube"
"github.com/vmware-tanzu/velero/pkg/util/logging"
)

const (
Expand All @@ -55,16 +56,22 @@ const (

type BackupRepoReconciler struct {
client.Client
namespace string
logger logrus.FieldLogger
clock clocks.WithTickerAndDelayedExecution
maintenanceFrequency time.Duration
backupRepoConfig string
repositoryManager repomanager.Manager
namespace string
logger logrus.FieldLogger
clock clocks.WithTickerAndDelayedExecution
maintenanceFrequency time.Duration
backupRepoConfig string
repositoryManager repomanager.Manager
keepLatestMaintenanceJobs int
repoMaintenanceConfig string
podResources kube.PodResources
logLevel logrus.Level
logFormat *logging.FormatFlag
}

func NewBackupRepoReconciler(namespace string, logger logrus.FieldLogger, client client.Client,
maintenanceFrequency time.Duration, backupRepoConfig string, repositoryManager repomanager.Manager) *BackupRepoReconciler {
func NewBackupRepoReconciler(namespace string, logger logrus.FieldLogger, client client.Client, repositoryManager repomanager.Manager,
maintenanceFrequency time.Duration, backupRepoConfig string, keepLatestMaintenanceJobs int, repoMaintenanceConfig string, podResources kube.PodResources,
logLevel logrus.Level, logFormat *logging.FormatFlag) *BackupRepoReconciler {
c := &BackupRepoReconciler{
client,
namespace,
Expand All @@ -73,6 +80,11 @@ func NewBackupRepoReconciler(namespace string, logger logrus.FieldLogger, client
maintenanceFrequency,
backupRepoConfig,
repositoryManager,
keepLatestMaintenanceJobs,
repoMaintenanceConfig,
podResources,
logLevel,
logFormat,
}

return c
Expand Down Expand Up @@ -212,7 +224,13 @@ func (r *BackupRepoReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return ctrl.Result{}, errors.Wrap(err, "error handling incomplete repo maintenance jobs")
}

return ctrl.Result{}, r.runMaintenanceIfDue(ctx, backupRepo, log)
if err := r.runMaintenanceIfDue(ctx, backupRepo, log); err != nil {
return ctrl.Result{}, errors.Wrap(err, "error check and run repo maintenance jobs")
}

if err := repository.DeleteOldMaintenanceJobs(r.Client, req.Name, r.keepLatestMaintenanceJobs); err != nil {
log.WithError(err).Warn("Failed to delete old maintenance jobs")
}
}

return ctrl.Result{}, nil
Expand Down Expand Up @@ -306,7 +324,7 @@ func ensureRepo(repo *velerov1api.BackupRepository, repoManager repomanager.Mana
}

func (r *BackupRepoReconciler) recallMaintenance(ctx context.Context, req *velerov1api.BackupRepository, log logrus.FieldLogger) error {
history, err := repository.WaitIncompleteMaintenance(ctx, r.Client, req, defaultMaintenanceStatusQueueLength, log)
history, err := repository.WaitAllMaintenanceJobComplete(ctx, r.Client, req, defaultMaintenanceStatusQueueLength, log)
if err != nil {
return errors.Wrapf(err, "error waiting incomplete repo maintenance job for repo %s", req.Name)
}
Expand Down Expand Up @@ -380,7 +398,11 @@ func getLastMaintenanceTimeFromHistory(history []velerov1api.BackupRepositoryMai
time := history[0].CompleteTimestamp

for i := range history {
if time.Before(history[i].CompleteTimestamp) {
if history[i].CompleteTimestamp == nil {
continue
}

if time == nil || time.Before(history[i].CompleteTimestamp) {
time = history[i].CompleteTimestamp
}
}
Expand All @@ -406,8 +428,13 @@ func isEarlierMaintenanceStatus(a, b velerov1api.BackupRepositoryMaintenanceStat
return a.StartTimestamp.Before(b.StartTimestamp)
}

var funcStartMaintenanceJob = repository.StartMaintenanceJob
var funcWaitMaintenanceJobComplete = repository.WaitMaintenanceJobComplete

func (r *BackupRepoReconciler) runMaintenanceIfDue(ctx context.Context, req *velerov1api.BackupRepository, log logrus.FieldLogger) error {
if !dueForMaintenance(req, r.clock.Now()) {
startTime := r.clock.Now()

if !dueForMaintenance(req, startTime) {
log.Debug("not due for maintenance")
return nil
}
Expand All @@ -418,31 +445,39 @@ func (r *BackupRepoReconciler) runMaintenanceIfDue(ctx context.Context, req *vel
// should not cause the repo to move to `NotReady`.
log.Debug("Pruning repo")

// when PruneRepo fails, the maintenance result will be left temporarily
job, err := funcStartMaintenanceJob(r.Client, ctx, req, r.repoMaintenanceConfig, r.podResources, r.logLevel, r.logFormat, log)
if err != nil {
log.WithError(err).Warn("Starting repo maintenance failed")
return r.patchBackupRepository(ctx, req, func(rr *velerov1api.BackupRepository) {
updateRepoMaintenanceHistory(rr, velerov1api.BackupRepositoryMaintenanceFailed, &metav1.Time{Time: startTime}, nil, fmt.Sprintf("Failed to start maintenance job, err: %v", err))
})
}

// when WaitMaintenanceJobComplete fails, the maintenance result will be left temporarily
// If the maintenenance still completes later, recallMaintenance recalls the left onces and update LastMaintenanceTime and history

Check failure on line 457 in pkg/controller/backup_repository_controller.go

View workflow job for this annotation

GitHub Actions / Run Codespell

onces ==> ounces, once, ones
status, err := r.repositoryManager.PruneRepo(req)
status, err := funcWaitMaintenanceJobComplete(r.Client, ctx, job, r.namespace, log)
if err != nil {
return errors.Wrapf(err, "error pruning repository")
return errors.Wrapf(err, "error waiting repo maintenance completion status")
}

if status.Result == velerov1api.BackupRepositoryMaintenanceFailed {
log.WithError(err).Warn("Pruning repository failed")
return r.patchBackupRepository(ctx, req, func(rr *velerov1api.BackupRepository) {
updateRepoMaintenanceHistory(rr, velerov1api.BackupRepositoryMaintenanceFailed, status.StartTimestamp.Time, status.CompleteTimestamp.Time, status.Message)
updateRepoMaintenanceHistory(rr, velerov1api.BackupRepositoryMaintenanceFailed, status.StartTimestamp, status.CompleteTimestamp, status.Message)
})
}

return r.patchBackupRepository(ctx, req, func(rr *velerov1api.BackupRepository) {
rr.Status.LastMaintenanceTime = &metav1.Time{Time: status.CompleteTimestamp.Time}
updateRepoMaintenanceHistory(rr, velerov1api.BackupRepositoryMaintenanceSucceeded, status.StartTimestamp.Time, status.CompleteTimestamp.Time, status.Message)
updateRepoMaintenanceHistory(rr, velerov1api.BackupRepositoryMaintenanceSucceeded, status.StartTimestamp, status.CompleteTimestamp, status.Message)
})
}

func updateRepoMaintenanceHistory(repo *velerov1api.BackupRepository, result velerov1api.BackupRepositoryMaintenanceResult, startTime time.Time, completionTime time.Time, message string) {
func updateRepoMaintenanceHistory(repo *velerov1api.BackupRepository, result velerov1api.BackupRepositoryMaintenanceResult, startTime, completionTime *metav1.Time, message string) {
latest := velerov1api.BackupRepositoryMaintenanceStatus{
Result: result,
StartTimestamp: &metav1.Time{Time: startTime},
CompleteTimestamp: &metav1.Time{Time: completionTime},
StartTimestamp: startTime,
CompleteTimestamp: completionTime,
Message: message,
}

Expand Down
Loading

0 comments on commit db69829

Please sign in to comment.