Skip to content

Commit

Permalink
Add perf test namespace mapping when restore
Browse files Browse the repository at this point in the history
Signed-off-by: Ming <mqiu@vmware.com>
  • Loading branch information
qiuming-best committed Nov 14, 2023
1 parent 6b7ce66 commit ce6c563
Show file tree
Hide file tree
Showing 13 changed files with 168 additions and 41 deletions.
7 changes: 7 additions & 0 deletions pkg/cmd/cli/install/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ type Options struct {
UseVolumeSnapshots bool
DefaultRepoMaintenanceFrequency time.Duration
GarbageCollectionFrequency time.Duration
PodVolumeOperationTimeout time.Duration
Plugins flag.StringArray
NoDefaultBackupLocation bool
CRDsOnly bool
Expand Down Expand Up @@ -116,6 +117,7 @@ func (o *Options) BindFlags(flags *pflag.FlagSet) {
flags.BoolVar(&o.Wait, "wait", o.Wait, "Wait for Velero deployment to be ready. Optional.")
flags.DurationVar(&o.DefaultRepoMaintenanceFrequency, "default-repo-maintain-frequency", o.DefaultRepoMaintenanceFrequency, "How often 'maintain' is run for backup repositories by default. Optional.")
flags.DurationVar(&o.GarbageCollectionFrequency, "garbage-collection-frequency", o.GarbageCollectionFrequency, "How often the garbage collection runs for expired backups.(default 1h)")
flags.DurationVar(&o.PodVolumeOperationTimeout, "pod-volume-operation-timeout", o.PodVolumeOperationTimeout, "How long to wait for pod volume operations to complete before timing out(default 4h). Optional.")
flags.Var(&o.Plugins, "plugins", "Plugin container images to install into the Velero Deployment")
flags.BoolVar(&o.CRDsOnly, "crds-only", o.CRDsOnly, "Only generate CustomResourceDefinition resources. Useful for updating CRDs for an existing Velero install.")
flags.StringVar(&o.CACertFile, "cacert", o.CACertFile, "File containing a certificate bundle to use when verifying TLS connections to the object store. Optional.")
Expand Down Expand Up @@ -209,6 +211,7 @@ func (o *Options) AsVeleroOptions() (*install.VeleroOptions, error) {
VSLConfig: o.VolumeSnapshotConfig.Data(),
DefaultRepoMaintenanceFrequency: o.DefaultRepoMaintenanceFrequency,
GarbageCollectionFrequency: o.GarbageCollectionFrequency,
PodVolumeOperationTimeout: o.PodVolumeOperationTimeout,
Plugins: o.Plugins,
NoDefaultBackupLocation: o.NoDefaultBackupLocation,
CACertData: caCertData,
Expand Down Expand Up @@ -426,5 +429,9 @@ func (o *Options) Validate(c *cobra.Command, args []string, f client.Factory) er
return errors.New("--garbage-collection-frequency must be non-negative")
}

if o.PodVolumeOperationTimeout < 0 {
return errors.New("--pod-volume-operation-timeout must be non-negative")
}

return nil
}
11 changes: 11 additions & 0 deletions pkg/install/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ type podTemplateConfig struct {
withSecret bool
defaultRepoMaintenanceFrequency time.Duration
garbageCollectionFrequency time.Duration
podVolumeOperationTimeout time.Duration
plugins []string
features []string
defaultVolumesToFsBackup bool
Expand Down Expand Up @@ -115,6 +116,12 @@ func WithGarbageCollectionFrequency(val time.Duration) podTemplateOption {
}
}

func WithPodVolumeOperationTimeout(val time.Duration) podTemplateOption {
return func(c *podTemplateConfig) {
c.podVolumeOperationTimeout = val
}
}

func WithPlugins(plugins []string) podTemplateOption {
return func(c *podTemplateConfig) {
c.plugins = plugins
Expand Down Expand Up @@ -212,6 +219,10 @@ func Deployment(namespace string, opts ...podTemplateOption) *appsv1.Deployment
args = append(args, fmt.Sprintf("--garbage-collection-frequency=%v", c.garbageCollectionFrequency))
}

if c.podVolumeOperationTimeout > 0 {
args = append(args, fmt.Sprintf("--pod-volume-operation-timeout=%v", c.podVolumeOperationTimeout))
}

Check warning on line 224 in pkg/install/deployment.go

View check run for this annotation

Codecov / codecov/patch

pkg/install/deployment.go#L223-L224

Added lines #L223 - L224 were not covered by tests

deployment := &appsv1.Deployment{
ObjectMeta: objectMeta(namespace, "velero"),
TypeMeta: metav1.TypeMeta{
Expand Down
2 changes: 2 additions & 0 deletions pkg/install/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ type VeleroOptions struct {
VSLConfig map[string]string
DefaultRepoMaintenanceFrequency time.Duration
GarbageCollectionFrequency time.Duration
PodVolumeOperationTimeout time.Duration
Plugins []string
NoDefaultBackupLocation bool
CACertData []byte
Expand Down Expand Up @@ -335,6 +336,7 @@ func AllResources(o *VeleroOptions) *unstructured.UnstructuredList {
WithDefaultRepoMaintenanceFrequency(o.DefaultRepoMaintenanceFrequency),
WithServiceAccountName(serviceAccountName),
WithGarbageCollectionFrequency(o.GarbageCollectionFrequency),
WithPodVolumeOperationTimeout(o.PodVolumeOperationTimeout),
WithUploaderType(o.UploaderType),
}

Expand Down
24 changes: 23 additions & 1 deletion test/perf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,17 @@ NFS_SERVER_PATH ?=
UPLOADER_TYPE ?=
TEST_CASE_DESCRIBE ?= 'velero performance test'
BACKUP_FOR_RESTORE ?=
Delete_Cluster_Resource ?= false
Debug_Velero_Pod_Restart ?= false
NODE_AGENT_POD_CPU_LIMIT ?=
NODE_AGENT_POD_MEM_LIMIT ?=
NODE_AGENT_POD_CPU_REQUEST ?=
NODE_AGENT_POD_MEM_REQUEST ?=
VELERO_POD_CPU_LIMIT ?=
VELERO_POD_MEM_LIMIT ?=
VELERO_POD_CPU_REQUEST ?=
VELERO_POD_MEM_REQUEST ?=
POD_VOLUME_OPERATION_TIMEOUT ?=

.PHONY:ginkgo
ginkgo: # Make sure ginkgo is in $GOPATH/bin
Expand Down Expand Up @@ -110,7 +121,18 @@ run: ginkgo
-uploader-type=$(UPLOADER_TYPE) \
-nfs-server-path=$(NFS_SERVER_PATH) \
-test-case-describe=$(TEST_CASE_DESCRIBE) \
-backup-for-restore=$(BACKUP_FOR_RESTORE)
-backup-for-restore=$(BACKUP_FOR_RESTORE) \
--delete-cluster-resource=$(Delete_Cluster_Resource) \
--debug-velero-pod-restart=$(Debug_Velero_Pod_Restart) \
--node-agent-pod-cpu-limit=$(NODE_AGENT_POD_CPU_LIMIT) \
--node-agent-pod-mem-limit=$(NODE_AGENT_POD_MEM_LIMIT) \
--node-agent-pod-cpu-request=$(NODE_AGENT_POD_CPU_REQUEST) \
--node-agent-pod-mem-request=$(NODE_AGENT_POD_MEM_REQUEST) \
--velero-pod-cpu-limit=$(VELERO_POD_CPU_LIMIT) \
--velero-pod-mem-limit=$(VELERO_POD_MEM_LIMIT) \
--velero-pod-cpu-request=$(VELERO_POD_CPU_REQUEST) \
--velero-pod-mem-request=$(VELERO_POD_MEM_REQUEST) \
--pod-volume-operation-timeout=$(POD_VOLUME_OPERATION_TIMEOUT)

build: ginkgo
mkdir -p $(OUTPUT_DIR)
Expand Down
2 changes: 1 addition & 1 deletion test/perf/backup/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ type BackupTest struct {

func (b *BackupTest) Init() error {
b.TestCase.Init()
b.Ctx, b.CtxCancel = context.WithTimeout(context.Background(), 1*time.Hour)
b.Ctx, b.CtxCancel = context.WithTimeout(context.Background(), 6*time.Hour)
b.CaseBaseName = "backup"
b.BackupName = "backup-" + b.CaseBaseName + "-" + b.UUIDgen

Expand Down
18 changes: 15 additions & 3 deletions test/perf/basic/basic.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ package basic

import (
"context"
"fmt"
"strings"
"time"

"github.com/pkg/errors"

. "github.com/vmware-tanzu/velero/test"
. "github.com/vmware-tanzu/velero/test/perf/test"
"github.com/vmware-tanzu/velero/test/util/k8s"
)

type BasicTest struct {
Expand All @@ -32,7 +34,7 @@ type BasicTest struct {

func (b *BasicTest) Init() error {
b.TestCase.Init()
b.Ctx, b.CtxCancel = context.WithTimeout(context.Background(), 1*time.Hour)
b.Ctx, b.CtxCancel = context.WithTimeout(context.Background(), 6*time.Hour)
b.CaseBaseName = "backuprestore"
b.BackupName = "backup-" + b.CaseBaseName + "-" + b.UUIDgen
b.RestoreName = "restore-" + b.CaseBaseName + "-" + b.UUIDgen
Expand All @@ -49,10 +51,20 @@ func (b *BasicTest) Init() error {
"--from-backup", b.BackupName, "--wait",
}

if !VeleroCfg.DeleteClusterResource {
joinedNsMapping, err := k8s.GetMappingNamespaces(b.Ctx, b.Client, *b.NSExcluded)
if err != nil {
return errors.Wrapf(err, "failed to get mapping namespaces in init")
}

b.RestoreArgs = append(b.RestoreArgs, "--namespace-mappings")
b.RestoreArgs = append(b.RestoreArgs, joinedNsMapping)
}

b.TestMsg = &TestMSG{
Desc: "Do backup and restore resources for performance test",
FailedMSG: "Failed to backup and restore resources",
Text: fmt.Sprintf("Should backup and restore resources success"),
Text: "Should backup and restore resources success",
}
return nil
}
12 changes: 12 additions & 0 deletions test/perf/e2e_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"flag"
"fmt"
"testing"
"time"

. "github.com/onsi/ginkgo"
"github.com/onsi/ginkgo/reporters"
Expand Down Expand Up @@ -56,6 +57,15 @@ func init() {
flag.BoolVar(&VeleroCfg.InstallVelero, "install-velero", true, "install/uninstall velero during the test. Optional.")
flag.BoolVar(&VeleroCfg.UseNodeAgent, "use-node-agent", true, "whether deploy node agent daemonset velero during the test. Optional.")
flag.StringVar(&VeleroCfg.RegistryCredentialFile, "registry-credential-file", "", "file containing credential for the image registry, follows the same format rules as the ~/.docker/config.json file. Optional.")
flag.StringVar(&VeleroCfg.NodeAgentPodCPULimit, "node-agent-pod-cpu-limit", "4", "CPU limit for node agent pod. Optional.")
flag.StringVar(&VeleroCfg.NodeAgentPodMemLimit, "node-agent-pod-mem-limit", "4Gi", "Memory limit for node agent pod. Optional.")
flag.StringVar(&VeleroCfg.NodeAgentPodCPURequest, "node-agent-pod-cpu-request", "2", "CPU request for node agent pod. Optional.")
flag.StringVar(&VeleroCfg.NodeAgentPodMemRequest, "node-agent-pod-mem-request", "2Gi", "Memory request for node agent pod. Optional.")
flag.StringVar(&VeleroCfg.VeleroPodCPULimit, "velero-pod-cpu-limit", "4", "CPU limit for velero pod. Optional.")
flag.StringVar(&VeleroCfg.VeleroPodMemLimit, "velero-pod-mem-limit", "4Gi", "Memory limit for velero pod. Optional.")
flag.StringVar(&VeleroCfg.VeleroPodCPURequest, "velero-pod-cpu-request", "2", "CPU request for velero pod. Optional.")
flag.StringVar(&VeleroCfg.VeleroPodMemRequest, "velero-pod-mem-request", "2Gi", "Memory request for velero pod. Optional.")
flag.DurationVar(&VeleroCfg.PodVolumeOperationTimeout, "pod-volume-operation-timeout", 360*time.Minute, "Timeout for pod volume operations. Optional.")
//vmware-tanzu-experiments
flag.StringVar(&VeleroCfg.Features, "features", "", "Comma-separated list of features to enable for this Velero process.")
flag.StringVar(&VeleroCfg.DefaultCluster, "default-cluster-context", "", "Default cluster context for migration test.")
Expand All @@ -65,6 +75,8 @@ func init() {
flag.StringVar(&VeleroCfg.NFSServerPath, "nfs-server-path", "", "the path of nfs server")
flag.StringVar(&VeleroCfg.TestCaseDescribe, "test-case-describe", "velero performance test", "the description for the current test")
flag.StringVar(&VeleroCfg.BackupForRestore, "backup-for-restore", "", "the name of backup for restore")
flag.BoolVar(&VeleroCfg.DeleteClusterResource, "delete-cluster-resource", false, "delete cluster resource after test")
flag.BoolVar(&VeleroCfg.DebugVeleroPodRestart, "debug-velero-pod-restart", false, "Switch for debugging velero pod restart.")
}

func initConfig() error {
Expand Down
40 changes: 21 additions & 19 deletions test/perf/metrics/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,30 +40,32 @@ type PodMetrics struct {

func (p *PodMetrics) Update() error {
cpu, mem, err := metrics.GetPodUsageMetrics(p.Ctx, p.Client, p.PodName, p.Namespace)
// TODO
fmt.Printf("Pod %s/%s metrics update", p.Namespace, p.PodName)
if err != nil {
return errors.WithStack(err)
} else {
keyMaxCPU := p.PodName + ":MaxCPU"
curCPU := cpu.MilliValue()
if curCPU > p.Metrics[keyMaxCPU] {
p.Metrics[keyMaxCPU] = curCPU
}
}
keyMaxCPU := p.PodName + ":MaxCPU"
curCPU := cpu.MilliValue()
if curCPU > p.Metrics[keyMaxCPU] {
p.Metrics[keyMaxCPU] = curCPU
}

keyMaxMem := p.PodName + ":MaxMemory"
curMem := mem.MilliValue()
if curMem > p.Metrics[keyMaxMem] {
p.Metrics[keyMaxMem] = curMem
}
keyMaxMem := p.PodName + ":MaxMemory"
curMem := mem.MilliValue()
if curMem > p.Metrics[keyMaxMem] {
p.Metrics[keyMaxMem] = curMem
}

keyAvgCPU := p.PodName + ":AverageCPU"
preAvgCPU := p.Metrics[keyAvgCPU]
p.Metrics[keyAvgCPU] = (preAvgCPU*p.count + curCPU) / (p.count + 1)
keyAvgCPU := p.PodName + ":AverageCPU"
preAvgCPU := p.Metrics[keyAvgCPU]
p.Metrics[keyAvgCPU] = (preAvgCPU*p.count + curCPU) / (p.count + 1)

keyAvgMem := p.PodName + ":AverageMemory"
preAvgMem := p.Metrics[keyAvgMem]
p.Metrics[keyAvgMem] = (preAvgMem*p.count + curMem) / (p.count + 1)
p.count++

keyAvgMem := p.PodName + ":AverageMemory"
preAvgMem := p.Metrics[keyAvgMem]
p.Metrics[keyAvgMem] = (preAvgMem*p.count + curMem) / (p.count + 1)
p.count++
}
return nil
}

Expand Down
20 changes: 18 additions & 2 deletions test/perf/restore/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

. "github.com/vmware-tanzu/velero/test"
. "github.com/vmware-tanzu/velero/test/perf/test"
"github.com/vmware-tanzu/velero/test/util/k8s"
. "github.com/vmware-tanzu/velero/test/util/velero"
)

Expand All @@ -34,7 +35,7 @@ type RestoreTest struct {

func (r *RestoreTest) Init() error {
r.TestCase.Init()
r.Ctx, r.CtxCancel = context.WithTimeout(context.Background(), 1*time.Hour)
r.Ctx, r.CtxCancel = context.WithTimeout(context.Background(), 6*time.Hour)
r.CaseBaseName = "restore"
r.RestoreName = "restore-" + r.CaseBaseName + "-" + r.UUIDgen

Expand All @@ -43,7 +44,7 @@ func (r *RestoreTest) Init() error {
FailedMSG: "Failed to restore resources",
Text: fmt.Sprintf("Should restore resources success"),
}
return r.clearUpResourcesBeforRestore()
return nil
}

func (r *RestoreTest) clearUpResourcesBeforRestore() error {
Expand All @@ -52,6 +53,11 @@ func (r *RestoreTest) clearUpResourcesBeforRestore() error {
}

func (r *RestoreTest) Restore() error {
// we need to clear up all resources before do the restore test
err := r.clearUpResourcesBeforRestore()
if err != nil {
return errors.Wrapf(err, "failed to clear up resources before do the restore test")
}
var backupName string
if VeleroCfg.BackupForRestore != "" {
backupName = VeleroCfg.BackupForRestore
Expand All @@ -71,6 +77,16 @@ func (r *RestoreTest) Restore() error {
"--from-backup", r.BackupName, "--wait",
}

if !VeleroCfg.DeleteClusterResource {
joinedNsMapping, err := k8s.GetMappingNamespaces(r.Ctx, r.Client, *r.NSExcluded)
if err != nil {
return errors.Wrapf(err, "failed to get mapping namespaces in init")
}

r.RestoreArgs = append(r.RestoreArgs, "--namespace-mappings")
r.RestoreArgs = append(r.RestoreArgs, joinedNsMapping)
}

return r.TestCase.Restore()
}
func (r *RestoreTest) Destroy() error {
Expand Down
14 changes: 8 additions & 6 deletions test/perf/test/test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func TestFunc(test VeleroBackupRestoreTest) func() {
}

func (t *TestCase) Init() error {
t.Ctx, t.CtxCancel = context.WithTimeout(context.Background(), 1*time.Hour)
t.Ctx, t.CtxCancel = context.WithTimeout(context.Background(), 6*time.Hour)
t.NSExcluded = &[]string{"kube-system", "velero", "default", "kube-public", "kube-node-lease"}
t.UUIDgen = t.GenerateUUID()
t.Client = *VeleroCfg.DefaultClient
Expand Down Expand Up @@ -131,10 +131,12 @@ func (t *TestCase) Backup() error {
}

func (t *TestCase) Destroy() error {
By(fmt.Sprintf("Start to destroy namespace %s......", t.CaseBaseName), func() {
Expect(CleanupNamespacesFiterdByExcludes(t.GetTestCase().Ctx, t.Client, *t.NSExcluded)).To(Succeed(), "Could cleanup retrieve namespaces")
Expect(ClearClaimRefForFailedPVs(t.Ctx, t.Client)).To(Succeed(), "Failed to make PV status become to available")
})
if VeleroCfg.DeleteClusterResource {
By(fmt.Sprintf("Start to destroy namespace %s......", t.CaseBaseName), func() {
Expect(CleanupNamespacesFiterdByExcludes(t.GetTestCase().Ctx, t.Client, *t.NSExcluded)).To(Succeed(), "Could cleanup retrieve namespaces")
Expect(ClearClaimRefForFailedPVs(t.Ctx, t.Client)).To(Succeed(), "Failed to make PV status become to available")
})
}
return nil
}

Expand All @@ -160,7 +162,7 @@ func (t *TestCase) Verify() error {
}

func (t *TestCase) Clean() error {
if !VeleroCfg.Debug {
if !VeleroCfg.Debug || VeleroCfg.DeleteClusterResource {
By("Clean backups and restore after test", func() {
if len(t.BackupArgs) != 0 {
if err := VeleroBackupDelete(t.Ctx, VeleroCfg.VeleroCLI, VeleroCfg.VeleroNamespace, t.BackupName); err != nil {
Expand Down
Loading

0 comments on commit ce6c563

Please sign in to comment.